Add a new type of lexer: a raw lexer, which does not require a preprocessor

object in order to do its thing.

llvm-svn: 43084
This commit is contained in:
Chris Lattner 2007-10-17 20:41:00 +00:00
parent b3dac3f5d9
commit 02b436a05a
2 changed files with 75 additions and 25 deletions

View File

@ -56,11 +56,15 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const {
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
/// Lexer constructor - Create a new lexer object for the specified buffer
/// with the specified preprocessor managing the lexing process. This lexer
/// assumes that the associated file buffer and Preprocessor objects will
/// outlive it, so it doesn't take ownership of either of them.
Lexer::Lexer(SourceLocation fileloc, Preprocessor &pp, Lexer::Lexer(SourceLocation fileloc, Preprocessor &pp,
const char *BufStart, const char *BufEnd) const char *BufStart, const char *BufEnd)
: FileLoc(fileloc), PP(pp), Features(PP.getLangOptions()) { : FileLoc(fileloc), PP(&pp), Features(pp.getLangOptions()) {
SourceManager &SourceMgr = PP.getSourceManager(); SourceManager &SourceMgr = PP->getSourceManager();
unsigned InputFileID = SourceMgr.getPhysicalLoc(FileLoc).getFileID(); unsigned InputFileID = SourceMgr.getPhysicalLoc(FileLoc).getFileID();
const llvm::MemoryBuffer *InputFile = SourceMgr.getBuffer(InputFileID); const llvm::MemoryBuffer *InputFile = SourceMgr.getBuffer(InputFileID);
@ -95,9 +99,44 @@ Lexer::Lexer(SourceLocation fileloc, Preprocessor &pp,
LexingRawMode = false; LexingRawMode = false;
// Default to keeping comments if requested. // Default to keeping comments if requested.
KeepCommentMode = PP.getCommentRetentionState(); KeepCommentMode = PP->getCommentRetentionState();
} }
/// Lexer constructor - Create a new raw lexer object. This object is only
/// suitable for calls to 'LexRawToken'. This lexer assumes that the
/// associated file buffer will outlive it, so it doesn't take ownership of
/// either of them.
Lexer::Lexer(SourceLocation fileloc, const LangOptions &features,
const char *BufStart, const char *BufEnd)
: FileLoc(fileloc), PP(0), Features(features) {
Is_PragmaLexer = false;
InitCharacterInfo();
BufferStart = BufStart;
BufferPtr = BufStart;
BufferEnd = BufEnd;
assert(BufferEnd[0] == 0 &&
"We assume that the input buffer has a null character at the end"
" to simplify lexing!");
// Start of the file is a start of line.
IsAtStartOfLine = true;
// We are not after parsing a #.
ParsingPreprocessorDirective = false;
// We are not after parsing #include.
ParsingFilename = false;
// We *are* in raw mode.
LexingRawMode = true;
// Never keep comments in raw mode.
KeepCommentMode = false;
}
/// Stringify - Convert the specified string into a C string, with surrounding /// Stringify - Convert the specified string into a C string, with surrounding
/// ""'s, and with escaped \ and " characters. /// ""'s, and with escaped \ and " characters.
std::string Lexer::Stringify(const std::string &Str, bool Charify) { std::string Lexer::Stringify(const std::string &Str, bool Charify) {
@ -223,7 +262,8 @@ SourceLocation Lexer::getSourceLocation(const char *Loc) const {
if (FileLoc.isFileID()) if (FileLoc.isFileID())
return SourceLocation::getFileLoc(FileLoc.getFileID(), CharNo); return SourceLocation::getFileLoc(FileLoc.getFileID(), CharNo);
return GetMappedTokenLoc(PP, FileLoc, CharNo); assert(PP && "This doesn't work on raw lexers");
return GetMappedTokenLoc(*PP, FileLoc, CharNo);
} }
/// Diag - Forwarding function for diagnostics. This translate a source /// Diag - Forwarding function for diagnostics. This translate a source
@ -232,13 +272,13 @@ void Lexer::Diag(const char *Loc, unsigned DiagID,
const std::string &Msg) const { const std::string &Msg) const {
if (LexingRawMode && Diagnostic::isNoteWarningOrExtension(DiagID)) if (LexingRawMode && Diagnostic::isNoteWarningOrExtension(DiagID))
return; return;
PP.Diag(getSourceLocation(Loc), DiagID, Msg); PP->Diag(getSourceLocation(Loc), DiagID, Msg);
} }
void Lexer::Diag(SourceLocation Loc, unsigned DiagID, void Lexer::Diag(SourceLocation Loc, unsigned DiagID,
const std::string &Msg) const { const std::string &Msg) const {
if (LexingRawMode && Diagnostic::isNoteWarningOrExtension(DiagID)) if (LexingRawMode && Diagnostic::isNoteWarningOrExtension(DiagID))
return; return;
PP.Diag(Loc, DiagID, Msg); PP->Diag(Loc, DiagID, Msg);
} }
@ -446,11 +486,11 @@ FinishIdentifier:
// Fill in Result.IdentifierInfo, looking up the identifier in the // Fill in Result.IdentifierInfo, looking up the identifier in the
// identifier table. // identifier table.
PP.LookUpIdentifierInfo(Result, IdStart); PP->LookUpIdentifierInfo(Result, IdStart);
// Finally, now that we know we have an identifier, pass this off to the // Finally, now that we know we have an identifier, pass this off to the
// preprocessor, which may macro expand it or something. // preprocessor, which may macro expand it or something.
return PP.HandleIdentifier(Result); return PP->HandleIdentifier(Result);
} }
// Otherwise, $,\,? in identifier found. Enter slower path. // Otherwise, $,\,? in identifier found. Enter slower path.
@ -758,13 +798,13 @@ bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) {
// If this BCPL-style comment is in a macro definition, transmogrify it into // If this BCPL-style comment is in a macro definition, transmogrify it into
// a C-style block comment. // a C-style block comment.
if (ParsingPreprocessorDirective) { if (ParsingPreprocessorDirective) {
std::string Spelling = PP.getSpelling(Result); std::string Spelling = PP->getSpelling(Result);
assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not bcpl comment?"); assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not bcpl comment?");
Spelling[1] = '*'; // Change prefix to "/*". Spelling[1] = '*'; // Change prefix to "/*".
Spelling += "*/"; // add suffix. Spelling += "*/"; // add suffix.
Result.setLocation(PP.CreateString(&Spelling[0], Spelling.size(), Result.setLocation(PP->CreateString(&Spelling[0], Spelling.size(),
Result.getLocation())); Result.getLocation()));
Result.setLength(Spelling.size()); Result.setLength(Spelling.size());
} }
return false; return false;
@ -1038,7 +1078,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
FormTokenWithChars(Result, CurPtr); FormTokenWithChars(Result, CurPtr);
// Restore comment saving mode, in case it was disabled for directive. // Restore comment saving mode, in case it was disabled for directive.
KeepCommentMode = PP.getCommentRetentionState(); KeepCommentMode = PP->getCommentRetentionState();
return true; // Have a token. return true; // Have a token.
} }
@ -1067,7 +1107,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
BufferPtr = CurPtr; BufferPtr = CurPtr;
// Finally, let the preprocessor handle this. // Finally, let the preprocessor handle this.
return PP.HandleEndOfFile(Result); return PP->HandleEndOfFile(Result);
} }
/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from /// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from
@ -1136,10 +1176,11 @@ LexNextToken:
if (CurPtr-1 == BufferEnd) { if (CurPtr-1 == BufferEnd) {
// Read the PP instance variable into an automatic variable, because // Read the PP instance variable into an automatic variable, because
// LexEndOfFile will often delete 'this'. // LexEndOfFile will often delete 'this'.
Preprocessor &PPCache = PP; Preprocessor *PPCache = PP;
if (LexEndOfFile(Result, CurPtr-1)) // Retreat back into the file. if (LexEndOfFile(Result, CurPtr-1)) // Retreat back into the file.
return; // Got a token to return. return; // Got a token to return.
return PPCache.Lex(Result); assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
return PPCache->Lex(Result);
} }
Diag(CurPtr-1, diag::null_in_file); Diag(CurPtr-1, diag::null_in_file);
@ -1155,7 +1196,7 @@ LexNextToken:
ParsingPreprocessorDirective = false; ParsingPreprocessorDirective = false;
// Restore comment saving mode, in case it was disabled for directive. // Restore comment saving mode, in case it was disabled for directive.
KeepCommentMode = PP.getCommentRetentionState(); KeepCommentMode = PP->getCommentRetentionState();
// Since we consumed a newline, we are back at the start of a line. // Since we consumed a newline, we are back at the start of a line.
IsAtStartOfLine = true; IsAtStartOfLine = true;
@ -1396,11 +1437,11 @@ LexNextToken:
// FIXME: -fpreprocessed mode?? // FIXME: -fpreprocessed mode??
if (Result.isAtStartOfLine() && !LexingRawMode) { if (Result.isAtStartOfLine() && !LexingRawMode) {
BufferPtr = CurPtr; BufferPtr = CurPtr;
PP.HandleDirective(Result); PP->HandleDirective(Result);
// As an optimization, if the preprocessor didn't switch lexers, tail // As an optimization, if the preprocessor didn't switch lexers, tail
// recurse. // recurse.
if (PP.isCurrentLexer(this)) { if (PP->isCurrentLexer(this)) {
// Start a new token. If this is a #include or something, the PP may // Start a new token. If this is a #include or something, the PP may
// want us starting at the beginning of the line again. If so, set // want us starting at the beginning of the line again. If so, set
// the StartOfLine flag. // the StartOfLine flag.
@ -1411,7 +1452,7 @@ LexNextToken:
goto LexNextToken; // GCC isn't tail call eliminating. goto LexNextToken; // GCC isn't tail call eliminating.
} }
return PP.Lex(Result); return PP->Lex(Result);
} }
} }
} else { } else {
@ -1525,11 +1566,11 @@ LexNextToken:
// FIXME: -fpreprocessed mode?? // FIXME: -fpreprocessed mode??
if (Result.isAtStartOfLine() && !LexingRawMode) { if (Result.isAtStartOfLine() && !LexingRawMode) {
BufferPtr = CurPtr; BufferPtr = CurPtr;
PP.HandleDirective(Result); PP->HandleDirective(Result);
// As an optimization, if the preprocessor didn't switch lexers, tail // As an optimization, if the preprocessor didn't switch lexers, tail
// recurse. // recurse.
if (PP.isCurrentLexer(this)) { if (PP->isCurrentLexer(this)) {
// Start a new token. If this is a #include or something, the PP may // Start a new token. If this is a #include or something, the PP may
// want us starting at the beginning of the line again. If so, set // want us starting at the beginning of the line again. If so, set
// the StartOfLine flag. // the StartOfLine flag.
@ -1539,7 +1580,7 @@ LexNextToken:
} }
goto LexNextToken; // GCC isn't tail call eliminating. goto LexNextToken; // GCC isn't tail call eliminating.
} }
return PP.Lex(Result); return PP->Lex(Result);
} }
} }
break; break;

View File

@ -36,7 +36,7 @@ class Lexer {
const char *BufferStart; // Start of the buffer. const char *BufferStart; // Start of the buffer.
const char *BufferEnd; // End of the buffer. const char *BufferEnd; // End of the buffer.
SourceLocation FileLoc; // Location for start of file. SourceLocation FileLoc; // Location for start of file.
Preprocessor &PP; // Preprocessor object controlling lexing. Preprocessor *PP; // Preprocessor object controlling lexing.
LangOptions Features; // Features enabled by this language (cache). LangOptions Features; // Features enabled by this language (cache).
bool Is_PragmaLexer; // True if lexer for _Pragma handling. bool Is_PragmaLexer; // True if lexer for _Pragma handling.
@ -62,6 +62,8 @@ class Lexer {
/// 4. All diagnostic messages are disabled, except for unterminated /*. /// 4. All diagnostic messages are disabled, except for unterminated /*.
/// 5. The only callback made into the preprocessor is to report a hard error /// 5. The only callback made into the preprocessor is to report a hard error
/// on an unterminated '/*' comment. /// on an unterminated '/*' comment.
///
/// Note that in raw mode that the PP pointer may be null.
bool LexingRawMode; bool LexingRawMode;
/// KeepCommentMode - The lexer can optionally keep C & BCPL-style comments, /// KeepCommentMode - The lexer can optionally keep C & BCPL-style comments,
@ -96,11 +98,18 @@ public:
/// Lexer constructor - Create a new lexer object for the specified buffer /// Lexer constructor - Create a new lexer object for the specified buffer
/// with the specified preprocessor managing the lexing process. This lexer /// with the specified preprocessor managing the lexing process. This lexer
/// assumes that the associated MemoryBuffer and Preprocessor objects will /// assumes that the associated file buffer and Preprocessor objects will
/// outlive it, so it doesn't take ownership of either of them. /// outlive it, so it doesn't take ownership of either of them.
Lexer(SourceLocation FileLoc, Preprocessor &PP, Lexer(SourceLocation FileLoc, Preprocessor &PP,
const char *BufStart = 0, const char *BufEnd = 0); const char *BufStart = 0, const char *BufEnd = 0);
/// Lexer constructor - Create a new raw lexer object. This object is only
/// suitable for calls to 'LexRawToken'. This lexer assumes that the
/// associated file buffer will outlive it, so it doesn't take ownership of
/// either of them.
Lexer(SourceLocation FileLoc, const LangOptions &Features,
const char *BufStart, const char *BufEnd);
/// getFeatures - Return the language features currently enabled. NOTE: this /// getFeatures - Return the language features currently enabled. NOTE: this
/// lexer modifies features as a file is parsed! /// lexer modifies features as a file is parsed!
const LangOptions &getFeatures() const { return Features; } const LangOptions &getFeatures() const { return Features; }
@ -138,7 +147,7 @@ public:
assert(!LexingRawMode && "Already in raw mode!"); assert(!LexingRawMode && "Already in raw mode!");
LexingRawMode = true; LexingRawMode = true;
Lex(Result); Lex(Result);
LexingRawMode = false; LexingRawMode = PP == 0;
// Note that lexing to the end of the buffer doesn't implicitly delete the // Note that lexing to the end of the buffer doesn't implicitly delete the
// lexer when in raw mode. // lexer when in raw mode.
return BufferPtr == BufferEnd; return BufferPtr == BufferEnd;