forked from OSchip/llvm-project
				
			
		
			
				
	
	
		
			127 lines
		
	
	
		
			3.7 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			127 lines
		
	
	
		
			3.7 KiB
		
	
	
	
		
			C++
		
	
	
	
//===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//
 | 
						|
//
 | 
						|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | 
						|
// See https://llvm.org/LICENSE.txt for license information.
 | 
						|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
///
 | 
						|
/// \file
 | 
						|
/// This file contains FormatTokenLexer, which tokenizes a source file
 | 
						|
/// into a token stream suitable for ClangFormat.
 | 
						|
///
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
 | 
						|
#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
 | 
						|
#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
 | 
						|
 | 
						|
#include "Encoding.h"
 | 
						|
#include "FormatToken.h"
 | 
						|
#include "clang/Basic/SourceLocation.h"
 | 
						|
#include "clang/Basic/SourceManager.h"
 | 
						|
#include "clang/Format/Format.h"
 | 
						|
#include "llvm/ADT/MapVector.h"
 | 
						|
#include "llvm/Support/Regex.h"
 | 
						|
 | 
						|
#include <stack>
 | 
						|
 | 
						|
namespace clang {
 | 
						|
namespace format {
 | 
						|
 | 
						|
enum LexerState {
 | 
						|
  NORMAL,
 | 
						|
  TEMPLATE_STRING,
 | 
						|
  TOKEN_STASHED,
 | 
						|
};
 | 
						|
 | 
						|
class FormatTokenLexer {
 | 
						|
public:
 | 
						|
  FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
 | 
						|
                   const FormatStyle &Style, encoding::Encoding Encoding);
 | 
						|
 | 
						|
  ArrayRef<FormatToken *> lex();
 | 
						|
 | 
						|
  const AdditionalKeywords &getKeywords() { return Keywords; }
 | 
						|
 | 
						|
private:
 | 
						|
  void tryMergePreviousTokens();
 | 
						|
 | 
						|
  bool tryMergeLessLess();
 | 
						|
  bool tryMergeNSStringLiteral();
 | 
						|
  bool tryMergeJSPrivateIdentifier();
 | 
						|
  bool tryMergeCSharpStringLiteral();
 | 
						|
  bool tryMergeCSharpKeywordVariables();
 | 
						|
  bool tryMergeCSharpNullConditionals();
 | 
						|
  bool tryMergeCSharpDoubleQuestion();
 | 
						|
  bool tryTransformCSharpForEach();
 | 
						|
 | 
						|
  bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
 | 
						|
 | 
						|
  // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
 | 
						|
  bool precedesOperand(FormatToken *Tok);
 | 
						|
 | 
						|
  bool canPrecedeRegexLiteral(FormatToken *Prev);
 | 
						|
 | 
						|
  // Tries to parse a JavaScript Regex literal starting at the current token,
 | 
						|
  // if that begins with a slash and is in a location where JavaScript allows
 | 
						|
  // regex literals. Changes the current token to a regex literal and updates
 | 
						|
  // its text if successful.
 | 
						|
  void tryParseJSRegexLiteral();
 | 
						|
 | 
						|
  // Handles JavaScript template strings.
 | 
						|
  //
 | 
						|
  // JavaScript template strings use backticks ('`') as delimiters, and allow
 | 
						|
  // embedding expressions nested in ${expr-here}. Template strings can be
 | 
						|
  // nested recursively, i.e. expressions can contain template strings in turn.
 | 
						|
  //
 | 
						|
  // The code below parses starting from a backtick, up to a closing backtick or
 | 
						|
  // an opening ${. It also maintains a stack of lexing contexts to handle
 | 
						|
  // nested template parts by balancing curly braces.
 | 
						|
  void handleTemplateStrings();
 | 
						|
 | 
						|
  void handleCSharpVerbatimAndInterpolatedStrings();
 | 
						|
 | 
						|
  void tryParsePythonComment();
 | 
						|
 | 
						|
  bool tryMerge_TMacro();
 | 
						|
 | 
						|
  bool tryMergeConflictMarkers();
 | 
						|
 | 
						|
  FormatToken *getStashedToken();
 | 
						|
 | 
						|
  FormatToken *getNextToken();
 | 
						|
 | 
						|
  FormatToken *FormatTok;
 | 
						|
  bool IsFirstToken;
 | 
						|
  std::stack<LexerState> StateStack;
 | 
						|
  unsigned Column;
 | 
						|
  unsigned TrailingWhitespace;
 | 
						|
  std::unique_ptr<Lexer> Lex;
 | 
						|
  const SourceManager &SourceMgr;
 | 
						|
  FileID ID;
 | 
						|
  const FormatStyle &Style;
 | 
						|
  IdentifierTable IdentTable;
 | 
						|
  AdditionalKeywords Keywords;
 | 
						|
  encoding::Encoding Encoding;
 | 
						|
  llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
 | 
						|
  // Index (in 'Tokens') of the last token that starts a new line.
 | 
						|
  unsigned FirstInLineIndex;
 | 
						|
  SmallVector<FormatToken *, 16> Tokens;
 | 
						|
 | 
						|
  llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros;
 | 
						|
 | 
						|
  bool FormattingDisabled;
 | 
						|
 | 
						|
  llvm::Regex MacroBlockBeginRegex;
 | 
						|
  llvm::Regex MacroBlockEndRegex;
 | 
						|
 | 
						|
  void readRawToken(FormatToken &Tok);
 | 
						|
 | 
						|
  void resetLexer(unsigned Offset);
 | 
						|
};
 | 
						|
 | 
						|
} // namespace format
 | 
						|
} // namespace clang
 | 
						|
 | 
						|
#endif
 |