383 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			383 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			C++
		
	
	
	
| //===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
 | |
| //
 | |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | |
| // See https://llvm.org/LICENSE.txt for license information.
 | |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| //
 | |
| // This class represents the Lexer for tablegen files.
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| #ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
 | |
| #define LLVM_LIB_TABLEGEN_TGLEXER_H
 | |
| 
 | |
| #include "llvm/ADT/StringRef.h"
 | |
| #include "llvm/ADT/StringSet.h"
 | |
| #include "llvm/Support/DataTypes.h"
 | |
| #include "llvm/Support/SMLoc.h"
 | |
| #include <cassert>
 | |
| #include <memory>
 | |
| #include <set>
 | |
| #include <string>
 | |
| #include <vector>
 | |
| 
 | |
| namespace llvm {
 | |
| template <typename T> class ArrayRef;
 | |
| class SourceMgr;
 | |
| class Twine;
 | |
| 
 | |
| namespace tgtok {
 | |
|   enum TokKind {
 | |
|     // Markers
 | |
|     Eof, Error,
 | |
| 
 | |
|     // Tokens with no info.
 | |
|     minus, plus,        // - +
 | |
|     l_square, r_square, // [ ]
 | |
|     l_brace, r_brace,   // { }
 | |
|     l_paren, r_paren,   // ( )
 | |
|     less, greater,      // < >
 | |
|     colon, semi,        // : ;
 | |
|     comma, dot,         // , .
 | |
|     equal, question,    // = ?
 | |
|     paste,              // #
 | |
|     dotdotdot,          // ...
 | |
| 
 | |
|     // Reserved keywords. ('ElseKW' is named to distinguish it from the
 | |
|     // existing 'Else' that means the preprocessor #else.)
 | |
|     Assert, Bit, Bits, Class, Code, Dag, Def, Defm, Defset, Defvar, ElseKW,
 | |
|     FalseKW, Field, Foreach, If, In, Include, Int, Let, List, MultiClass,
 | |
|     String, Then, TrueKW,
 | |
| 
 | |
|     // Bang operators.
 | |
|     XConcat, XADD, XSUB, XMUL, XNOT, XAND, XOR, XXOR, XSRA, XSRL, XSHL,
 | |
|     XListConcat, XListSplat, XStrConcat, XInterleave, XSubstr, XFind, XCast,
 | |
|     XSubst, XForEach, XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf,
 | |
|     XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp,
 | |
|     XExists,
 | |
| 
 | |
|     // Boolean literals.
 | |
|     TrueVal, FalseVal,
 | |
| 
 | |
|     // Integer value.
 | |
|     IntVal,
 | |
| 
 | |
|     // Binary constant.  Note that these are sized according to the number of
 | |
|     // bits given.
 | |
|     BinaryIntVal,
 | |
| 
 | |
|     // String valued tokens.
 | |
|     Id, StrVal, VarName, CodeFragment,
 | |
| 
 | |
|     // Preprocessing tokens for internal usage by the lexer.
 | |
|     // They are never returned as a result of Lex().
 | |
|     Ifdef, Ifndef, Else, Endif, Define
 | |
|   };
 | |
| }
 | |
| 
 | |
| /// TGLexer - TableGen Lexer class.
 | |
| class TGLexer {
 | |
|   SourceMgr &SrcMgr;
 | |
| 
 | |
|   const char *CurPtr = nullptr;
 | |
|   StringRef CurBuf;
 | |
| 
 | |
|   // Information about the current token.
 | |
|   const char *TokStart = nullptr;
 | |
|   tgtok::TokKind CurCode = tgtok::TokKind::Eof;
 | |
|   std::string CurStrVal; // This is valid for Id, StrVal, VarName, CodeFragment
 | |
|   int64_t CurIntVal = 0; // This is valid for IntVal.
 | |
| 
 | |
|   /// CurBuffer - This is the current buffer index we're lexing from as managed
 | |
|   /// by the SourceMgr object.
 | |
|   unsigned CurBuffer = 0;
 | |
| 
 | |
| public:
 | |
|   typedef std::set<std::string> DependenciesSetTy;
 | |
| 
 | |
| private:
 | |
|   /// Dependencies - This is the list of all included files.
 | |
|   DependenciesSetTy Dependencies;
 | |
| 
 | |
| public:
 | |
|   TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
 | |
| 
 | |
|   tgtok::TokKind Lex() {
 | |
|     return CurCode = LexToken(CurPtr == CurBuf.begin());
 | |
|   }
 | |
| 
 | |
|   const DependenciesSetTy &getDependencies() const {
 | |
|     return Dependencies;
 | |
|   }
 | |
| 
 | |
|   tgtok::TokKind getCode() const { return CurCode; }
 | |
| 
 | |
|   const std::string &getCurStrVal() const {
 | |
|     assert((CurCode == tgtok::Id || CurCode == tgtok::StrVal ||
 | |
|             CurCode == tgtok::VarName || CurCode == tgtok::CodeFragment) &&
 | |
|            "This token doesn't have a string value");
 | |
|     return CurStrVal;
 | |
|   }
 | |
|   int64_t getCurIntVal() const {
 | |
|     assert(CurCode == tgtok::IntVal && "This token isn't an integer");
 | |
|     return CurIntVal;
 | |
|   }
 | |
|   std::pair<int64_t, unsigned> getCurBinaryIntVal() const {
 | |
|     assert(CurCode == tgtok::BinaryIntVal &&
 | |
|            "This token isn't a binary integer");
 | |
|     return std::make_pair(CurIntVal, (CurPtr - TokStart)-2);
 | |
|   }
 | |
| 
 | |
|   SMLoc getLoc() const;
 | |
| 
 | |
| private:
 | |
|   /// LexToken - Read the next token and return its code.
 | |
|   tgtok::TokKind LexToken(bool FileOrLineStart = false);
 | |
| 
 | |
|   tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg);
 | |
|   tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
 | |
| 
 | |
|   int getNextChar();
 | |
|   int peekNextChar(int Index) const;
 | |
|   void SkipBCPLComment();
 | |
|   bool SkipCComment();
 | |
|   tgtok::TokKind LexIdentifier();
 | |
|   bool LexInclude();
 | |
|   tgtok::TokKind LexString();
 | |
|   tgtok::TokKind LexVarName();
 | |
|   tgtok::TokKind LexNumber();
 | |
|   tgtok::TokKind LexBracket();
 | |
|   tgtok::TokKind LexExclaim();
 | |
| 
 | |
|   // Process EOF encountered in LexToken().
 | |
|   // If EOF is met in an include file, then the method will update
 | |
|   // CurPtr, CurBuf and preprocessing include stack, and return true.
 | |
|   // If EOF is met in the top-level file, then the method will
 | |
|   // update and check the preprocessing include stack, and return false.
 | |
|   bool processEOF();
 | |
| 
 | |
|   // *** Structures and methods for preprocessing support ***
 | |
| 
 | |
|   // A set of macro names that are defined either via command line or
 | |
|   // by using:
 | |
|   //     #define NAME
 | |
|   StringSet<> DefinedMacros;
 | |
| 
 | |
|   // Each of #ifdef and #else directives has a descriptor associated
 | |
|   // with it.
 | |
|   //
 | |
|   // An ordered list of preprocessing controls defined by #ifdef/#else
 | |
|   // directives that are in effect currently is called preprocessing
 | |
|   // control stack.  It is represented as a vector of PreprocessorControlDesc's.
 | |
|   //
 | |
|   // The control stack is updated according to the following rules:
 | |
|   //
 | |
|   // For each #ifdef we add an element to the control stack.
 | |
|   // For each #else we replace the top element with a descriptor
 | |
|   // with an inverted IsDefined value.
 | |
|   // For each #endif we pop the top element from the control stack.
 | |
|   //
 | |
|   // When CurPtr reaches the current buffer's end, the control stack
 | |
|   // must be empty, i.e. #ifdef and the corresponding #endif
 | |
|   // must be located in the same file.
 | |
|   struct PreprocessorControlDesc {
 | |
|     // Either tgtok::Ifdef or tgtok::Else.
 | |
|     tgtok::TokKind Kind;
 | |
| 
 | |
|     // True, if the condition for this directive is true, false - otherwise.
 | |
|     // Examples:
 | |
|     //     #ifdef NAME       : true, if NAME is defined, false - otherwise.
 | |
|     //     ...
 | |
|     //     #else             : false, if NAME is defined, true - otherwise.
 | |
|     bool IsDefined;
 | |
| 
 | |
|     // Pointer into CurBuf to the beginning of the preprocessing directive
 | |
|     // word, e.g.:
 | |
|     //     #ifdef NAME
 | |
|     //      ^ - SrcPos
 | |
|     SMLoc SrcPos;
 | |
|   };
 | |
| 
 | |
|   // We want to disallow code like this:
 | |
|   //     file1.td:
 | |
|   //         #define NAME
 | |
|   //         #ifdef NAME
 | |
|   //         include "file2.td"
 | |
|   //     EOF
 | |
|   //     file2.td:
 | |
|   //         #endif
 | |
|   //     EOF
 | |
|   //
 | |
|   // To do this, we clear the preprocessing control stack on entry
 | |
|   // to each of the included file.  PrepIncludeStack is used to store
 | |
|   // preprocessing control stacks for the current file and all its
 | |
|   // parent files.  The back() element is the preprocessing control
 | |
|   // stack for the current file.
 | |
|   std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>>
 | |
|       PrepIncludeStack;
 | |
| 
 | |
|   // Validate that the current preprocessing control stack is empty,
 | |
|   // since we are about to exit a file, and pop the include stack.
 | |
|   //
 | |
|   // If IncludeStackMustBeEmpty is true, the include stack must be empty
 | |
|   // after the popping, otherwise, the include stack must not be empty
 | |
|   // after the popping.  Basically, the include stack must be empty
 | |
|   // only if we exit the "top-level" file (i.e. finish lexing).
 | |
|   //
 | |
|   // The method returns false, if the current preprocessing control stack
 | |
|   // is not empty (e.g. there is an unterminated #ifdef/#else),
 | |
|   // true - otherwise.
 | |
|   bool prepExitInclude(bool IncludeStackMustBeEmpty);
 | |
| 
 | |
|   // Look ahead for a preprocessing directive starting from CurPtr.  The caller
 | |
|   // must only call this method, if *(CurPtr - 1) is '#'.  If the method matches
 | |
|   // a preprocessing directive word followed by a whitespace, then it returns
 | |
|   // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
 | |
|   //
 | |
|   // CurPtr is not adjusted by this method.
 | |
|   tgtok::TokKind prepIsDirective() const;
 | |
| 
 | |
|   // Given a preprocessing token kind, adjusts CurPtr to the end
 | |
|   // of the preprocessing directive word.  Returns true, unless
 | |
|   // an unsupported token kind is passed in.
 | |
|   //
 | |
|   // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
 | |
|   // to avoid adjusting CurPtr before we are sure that '#' is followed
 | |
|   // by a preprocessing directive.  If it is not, then we fall back to
 | |
|   // tgtok::paste interpretation of '#'.
 | |
|   bool prepEatPreprocessorDirective(tgtok::TokKind Kind);
 | |
| 
 | |
|   // The main "exit" point from the token parsing to preprocessor.
 | |
|   //
 | |
|   // The method is called for CurPtr, when prepIsDirective() returns
 | |
|   // true.  The first parameter matches the result of prepIsDirective(),
 | |
|   // denoting the actual preprocessor directive to be processed.
 | |
|   //
 | |
|   // If the preprocessing directive disables the tokens processing, e.g.:
 | |
|   //     #ifdef NAME // NAME is undefined
 | |
|   // then lexPreprocessor() enters the lines-skipping mode.
 | |
|   // In this mode, it does not parse any tokens, because the code under
 | |
|   // the #ifdef may not even be a correct tablegen code.  The preprocessor
 | |
|   // looks for lines containing other preprocessing directives, which
 | |
|   // may be prepended with whitespaces and C-style comments.  If the line
 | |
|   // does not contain a preprocessing directive, it is skipped completely.
 | |
|   // Otherwise, the preprocessing directive is processed by recursively
 | |
|   // calling lexPreprocessor().  The processing of the encountered
 | |
|   // preprocessing directives includes updating preprocessing control stack
 | |
|   // and adding new macros into DefinedMacros set.
 | |
|   //
 | |
|   // The second parameter controls whether lexPreprocessor() is called from
 | |
|   // LexToken() (true) or recursively from lexPreprocessor() (false).
 | |
|   //
 | |
|   // If ReturnNextLiveToken is true, the method returns the next
 | |
|   // LEX token following the current directive or following the end
 | |
|   // of the disabled preprocessing region corresponding to this directive.
 | |
|   // If ReturnNextLiveToken is false, the method returns the first parameter,
 | |
|   // unless there were errors encountered in the disabled preprocessing
 | |
|   // region - in this case, it returns tgtok::Error.
 | |
|   tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind,
 | |
|                                  bool ReturnNextLiveToken = true);
 | |
| 
 | |
|   // Worker method for lexPreprocessor() to skip lines after some
 | |
|   // preprocessing directive up to the buffer end or to the directive
 | |
|   // that re-enables token processing.  The method returns true
 | |
|   // upon processing the next directive that re-enables tokens
 | |
|   // processing.  False is returned if an error was encountered.
 | |
|   //
 | |
|   // Note that prepSkipRegion() calls lexPreprocessor() to process
 | |
|   // encountered preprocessing directives.  In this case, the second
 | |
|   // parameter to lexPreprocessor() is set to false.  Being passed
 | |
|   // false ReturnNextLiveToken, lexPreprocessor() must never call
 | |
|   // prepSkipRegion().  We assert this by passing ReturnNextLiveToken
 | |
|   // to prepSkipRegion() and checking that it is never set to false.
 | |
|   bool prepSkipRegion(bool MustNeverBeFalse);
 | |
| 
 | |
|   // Lex name of the macro after either #ifdef or #define.  We could have used
 | |
|   // LexIdentifier(), but it has special handling of "include" word, which
 | |
|   // could result in awkward diagnostic errors.  Consider:
 | |
|   // ----
 | |
|   // #ifdef include
 | |
|   // class ...
 | |
|   // ----
 | |
|   // LexIdentifier() will engage LexInclude(), which will complain about
 | |
|   // missing file with name "class".  Instead, prepLexMacroName() will treat
 | |
|   // "include" as a normal macro name.
 | |
|   //
 | |
|   // On entry, CurPtr points to the end of a preprocessing directive word.
 | |
|   // The method allows for whitespaces between the preprocessing directive
 | |
|   // and the macro name.  The allowed whitespaces are ' ' and '\t'.
 | |
|   //
 | |
|   // If the first non-whitespace symbol after the preprocessing directive
 | |
|   // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
 | |
|   // the method updates TokStart to the position of the first non-whitespace
 | |
|   // symbol, sets CurPtr to the position of the macro name's last symbol,
 | |
|   // and returns a string reference to the macro name.  Otherwise,
 | |
|   // TokStart is set to the first non-whitespace symbol after the preprocessing
 | |
|   // directive, and the method returns an empty string reference.
 | |
|   //
 | |
|   // In all cases, TokStart may be used to point to the word following
 | |
|   // the preprocessing directive.
 | |
|   StringRef prepLexMacroName();
 | |
| 
 | |
|   // Skip any whitespaces starting from CurPtr.  The method is used
 | |
|   // only in the lines-skipping mode to find the first non-whitespace
 | |
|   // symbol after or at CurPtr.  Allowed whitespaces are ' ', '\t', '\n'
 | |
|   // and '\r'.  The method skips C-style comments as well, because
 | |
|   // it is used to find the beginning of the preprocessing directive.
 | |
|   // If we do not handle C-style comments the following code would
 | |
|   // result in incorrect detection of a preprocessing directive:
 | |
|   //     /*
 | |
|   //     #ifdef NAME
 | |
|   //     */
 | |
|   // As long as we skip C-style comments, the following code is correctly
 | |
|   // recognized as a preprocessing directive:
 | |
|   //     /* first line comment
 | |
|   //        second line comment */ #ifdef NAME
 | |
|   //
 | |
|   // The method returns true upon reaching the first non-whitespace symbol
 | |
|   // or EOF, CurPtr is set to point to this symbol.  The method returns false,
 | |
|   // if an error occurred during skipping of a C-style comment.
 | |
|   bool prepSkipLineBegin();
 | |
| 
 | |
|   // Skip any whitespaces or comments after a preprocessing directive.
 | |
|   // The method returns true upon reaching either end of the line
 | |
|   // or end of the file.  If there is a multiline C-style comment
 | |
|   // after the preprocessing directive, the method skips
 | |
|   // the comment, so the final CurPtr may point to one of the next lines.
 | |
|   // The method returns false, if an error occurred during skipping
 | |
|   // C- or C++-style comment, or a non-whitespace symbol appears
 | |
|   // after the preprocessing directive.
 | |
|   //
 | |
|   // The method maybe called both during lines-skipping and tokens
 | |
|   // processing.  It actually verifies that only whitespaces or/and
 | |
|   // comments follow a preprocessing directive.
 | |
|   //
 | |
|   // After the execution of this mehod, CurPtr points either to new line
 | |
|   // symbol, buffer end or non-whitespace symbol following the preprocesing
 | |
|   // directive.
 | |
|   bool prepSkipDirectiveEnd();
 | |
| 
 | |
|   // Skip all symbols to the end of the line/file.
 | |
|   // The method adjusts CurPtr, so that it points to either new line
 | |
|   // symbol in the current line or the buffer end.
 | |
|   void prepSkipToLineEnd();
 | |
| 
 | |
|   // Return true, if the current preprocessor control stack is such that
 | |
|   // we should allow lexer to process the next token, false - otherwise.
 | |
|   //
 | |
|   // In particular, the method returns true, if all the #ifdef/#else
 | |
|   // controls on the stack have their IsDefined member set to true.
 | |
|   bool prepIsProcessingEnabled();
 | |
| 
 | |
|   // Report an error, if we reach EOF with non-empty preprocessing control
 | |
|   // stack.  This means there is no matching #endif for the previous
 | |
|   // #ifdef/#else.
 | |
|   void prepReportPreprocessorStackError();
 | |
| };
 | |
| 
 | |
| } // end namespace llvm
 | |
| 
 | |
| #endif
 |