[pseudo] Define a clangPseudoCLI library.
- define a common data structure Language which is a compiled result of the bnf grammar. It is defined in Language.h; - creates a clangPseudoCLI lib which defines a grammar commandline flag and expose a function to get the Language. It supports --grammar=cxx, --grammmar=/path/to/file.bnf; - use the clangPseudoCLI in clang-pseudo, fuzzer, and benchmark tools ( simplify the code and use the prebuilt cxx grammar); Split out from https://reviews.llvm.org/D127448. Differential Revision: https://reviews.llvm.org/D128679
This commit is contained in:
parent
39377d5227
commit
fe66aebd75
|
@ -25,6 +25,7 @@
|
||||||
#include "clang-pseudo/Forest.h"
|
#include "clang-pseudo/Forest.h"
|
||||||
#include "clang-pseudo/GLR.h"
|
#include "clang-pseudo/GLR.h"
|
||||||
#include "clang-pseudo/Token.h"
|
#include "clang-pseudo/Token.h"
|
||||||
|
#include "clang-pseudo/cli/CLI.h"
|
||||||
#include "clang-pseudo/grammar/Grammar.h"
|
#include "clang-pseudo/grammar/Grammar.h"
|
||||||
#include "clang-pseudo/grammar/LRTable.h"
|
#include "clang-pseudo/grammar/LRTable.h"
|
||||||
#include "clang/Basic/LangOptions.h"
|
#include "clang/Basic/LangOptions.h"
|
||||||
|
@ -39,9 +40,6 @@ using llvm::cl::desc;
|
||||||
using llvm::cl::opt;
|
using llvm::cl::opt;
|
||||||
using llvm::cl::Required;
|
using llvm::cl::Required;
|
||||||
|
|
||||||
static opt<std::string> GrammarFile("grammar",
|
|
||||||
desc("Parse and check a BNF grammar file."),
|
|
||||||
Required);
|
|
||||||
static opt<std::string> Source("source", desc("Source file"), Required);
|
static opt<std::string> Source("source", desc("Source file"), Required);
|
||||||
|
|
||||||
namespace clang {
|
namespace clang {
|
||||||
|
@ -49,11 +47,10 @@ namespace pseudo {
|
||||||
namespace bench {
|
namespace bench {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
const std::string *GrammarText = nullptr;
|
|
||||||
const std::string *SourceText = nullptr;
|
const std::string *SourceText = nullptr;
|
||||||
const Grammar *G = nullptr;
|
const Language *Lang = nullptr;
|
||||||
|
|
||||||
void setupGrammarAndSource() {
|
void setup() {
|
||||||
auto ReadFile = [](llvm::StringRef FilePath) -> std::string {
|
auto ReadFile = [](llvm::StringRef FilePath) -> std::string {
|
||||||
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
|
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
|
||||||
llvm::MemoryBuffer::getFile(FilePath);
|
llvm::MemoryBuffer::getFile(FilePath);
|
||||||
|
@ -64,22 +61,13 @@ void setupGrammarAndSource() {
|
||||||
}
|
}
|
||||||
return GrammarText.get()->getBuffer().str();
|
return GrammarText.get()->getBuffer().str();
|
||||||
};
|
};
|
||||||
GrammarText = new std::string(ReadFile(GrammarFile));
|
|
||||||
SourceText = new std::string(ReadFile(Source));
|
SourceText = new std::string(ReadFile(Source));
|
||||||
std::vector<std::string> Diags;
|
Lang = &getLanguageFromFlags();
|
||||||
G = new Grammar(Grammar::parseBNF(*GrammarText, Diags));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void parseBNF(benchmark::State &State) {
|
|
||||||
std::vector<std::string> Diags;
|
|
||||||
for (auto _ : State)
|
|
||||||
Grammar::parseBNF(*GrammarText, Diags);
|
|
||||||
}
|
|
||||||
BENCHMARK(parseBNF);
|
|
||||||
|
|
||||||
static void buildSLR(benchmark::State &State) {
|
static void buildSLR(benchmark::State &State) {
|
||||||
for (auto _ : State)
|
for (auto _ : State)
|
||||||
LRTable::buildSLR(*G);
|
LRTable::buildSLR(Lang->G);
|
||||||
}
|
}
|
||||||
BENCHMARK(buildSLR);
|
BENCHMARK(buildSLR);
|
||||||
|
|
||||||
|
@ -129,13 +117,13 @@ static void preprocess(benchmark::State &State) {
|
||||||
BENCHMARK(preprocess);
|
BENCHMARK(preprocess);
|
||||||
|
|
||||||
static void glrParse(benchmark::State &State) {
|
static void glrParse(benchmark::State &State) {
|
||||||
LRTable Table = clang::pseudo::LRTable::buildSLR(*G);
|
SymbolID StartSymbol = *Lang->G->findNonterminal("translation-unit");
|
||||||
SymbolID StartSymbol = *G->findNonterminal("translation-unit");
|
|
||||||
TokenStream Stream = lexAndPreprocess();
|
TokenStream Stream = lexAndPreprocess();
|
||||||
for (auto _ : State) {
|
for (auto _ : State) {
|
||||||
pseudo::ForestArena Forest;
|
pseudo::ForestArena Forest;
|
||||||
pseudo::GSS GSS;
|
pseudo::GSS GSS;
|
||||||
pseudo::glrParse(Stream, ParseParams{*G, Table, Forest, GSS}, StartSymbol);
|
pseudo::glrParse(Stream, ParseParams{*Lang->G, Lang->Table, Forest, GSS},
|
||||||
|
StartSymbol);
|
||||||
}
|
}
|
||||||
State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
|
State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
|
||||||
SourceText->size());
|
SourceText->size());
|
||||||
|
@ -143,13 +131,13 @@ static void glrParse(benchmark::State &State) {
|
||||||
BENCHMARK(glrParse);
|
BENCHMARK(glrParse);
|
||||||
|
|
||||||
static void full(benchmark::State &State) {
|
static void full(benchmark::State &State) {
|
||||||
LRTable Table = clang::pseudo::LRTable::buildSLR(*G);
|
SymbolID StartSymbol = *Lang->G.findNonterminal("translation-unit");
|
||||||
SymbolID StartSymbol = *G->findNonterminal("translation-unit");
|
|
||||||
for (auto _ : State) {
|
for (auto _ : State) {
|
||||||
TokenStream Stream = lexAndPreprocess();
|
TokenStream Stream = lexAndPreprocess();
|
||||||
pseudo::ForestArena Forest;
|
pseudo::ForestArena Forest;
|
||||||
pseudo::GSS GSS;
|
pseudo::GSS GSS;
|
||||||
pseudo::glrParse(lexAndPreprocess(), ParseParams{*G, Table, Forest, GSS},
|
pseudo::glrParse(lexAndPreprocess(),
|
||||||
|
ParseParams{Lang->G, Lang->Table, Forest, GSS},
|
||||||
StartSymbol);
|
StartSymbol);
|
||||||
}
|
}
|
||||||
State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
|
State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
|
||||||
|
@ -165,7 +153,7 @@ BENCHMARK(full);
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
benchmark::Initialize(&argc, argv);
|
benchmark::Initialize(&argc, argv);
|
||||||
llvm::cl::ParseCommandLineOptions(argc, argv);
|
llvm::cl::ParseCommandLineOptions(argc, argv);
|
||||||
clang::pseudo::bench::setupGrammarAndSource();
|
clang::pseudo::bench::setup();
|
||||||
benchmark::RunSpecifiedBenchmarks();
|
benchmark::RunSpecifiedBenchmarks();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@ add_benchmark(ClangPseudoBenchmark Benchmark.cpp)
|
||||||
target_link_libraries(ClangPseudoBenchmark
|
target_link_libraries(ClangPseudoBenchmark
|
||||||
PRIVATE
|
PRIVATE
|
||||||
clangPseudo
|
clangPseudo
|
||||||
|
clangPseudoCLI
|
||||||
clangPseudoGrammar
|
clangPseudoGrammar
|
||||||
LLVMSupport
|
LLVMSupport
|
||||||
)
|
)
|
||||||
|
|
|
@ -11,5 +11,6 @@ add_llvm_fuzzer(clang-pseudo-fuzzer
|
||||||
target_link_libraries(clang-pseudo-fuzzer
|
target_link_libraries(clang-pseudo-fuzzer
|
||||||
PRIVATE
|
PRIVATE
|
||||||
clangPseudo
|
clangPseudo
|
||||||
|
clangPseudoCLI
|
||||||
clangPseudoGrammar
|
clangPseudoGrammar
|
||||||
)
|
)
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#include "clang-pseudo/Forest.h"
|
#include "clang-pseudo/Forest.h"
|
||||||
#include "clang-pseudo/GLR.h"
|
#include "clang-pseudo/GLR.h"
|
||||||
#include "clang-pseudo/Token.h"
|
#include "clang-pseudo/Token.h"
|
||||||
|
#include "clang-pseudo/cli/CLI.h"
|
||||||
#include "clang-pseudo/grammar/Grammar.h"
|
#include "clang-pseudo/grammar/Grammar.h"
|
||||||
#include "clang-pseudo/grammar/LRTable.h"
|
#include "clang-pseudo/grammar/LRTable.h"
|
||||||
#include "clang/Basic/LangOptions.h"
|
#include "clang/Basic/LangOptions.h"
|
||||||
|
@ -24,28 +25,10 @@ namespace {
|
||||||
|
|
||||||
class Fuzzer {
|
class Fuzzer {
|
||||||
clang::LangOptions LangOpts = clang::pseudo::genericLangOpts();
|
clang::LangOptions LangOpts = clang::pseudo::genericLangOpts();
|
||||||
Grammar G;
|
|
||||||
LRTable T;
|
|
||||||
bool Print;
|
bool Print;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Fuzzer(llvm::StringRef GrammarPath, bool Print) : Print(Print) {
|
Fuzzer(bool Print) : Print(Print) {}
|
||||||
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
|
|
||||||
llvm::MemoryBuffer::getFile(GrammarPath);
|
|
||||||
if (std::error_code EC = GrammarText.getError()) {
|
|
||||||
llvm::errs() << "Error: can't read grammar file '" << GrammarPath
|
|
||||||
<< "': " << EC.message() << "\n";
|
|
||||||
std::exit(1);
|
|
||||||
}
|
|
||||||
std::vector<std::string> Diags;
|
|
||||||
G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags);
|
|
||||||
if (!Diags.empty()) {
|
|
||||||
for (const auto &Diag : Diags)
|
|
||||||
llvm::errs() << Diag << "\n";
|
|
||||||
std::exit(1);
|
|
||||||
}
|
|
||||||
T = LRTable::buildSLR(G);
|
|
||||||
}
|
|
||||||
|
|
||||||
void operator()(llvm::StringRef Code) {
|
void operator()(llvm::StringRef Code) {
|
||||||
std::string CodeStr = Code.str(); // Must be null-terminated.
|
std::string CodeStr = Code.str(); // Must be null-terminated.
|
||||||
|
@ -58,11 +41,13 @@ public:
|
||||||
|
|
||||||
clang::pseudo::ForestArena Arena;
|
clang::pseudo::ForestArena Arena;
|
||||||
clang::pseudo::GSS GSS;
|
clang::pseudo::GSS GSS;
|
||||||
|
const Language &Lang = getLanguageFromFlags();
|
||||||
auto &Root =
|
auto &Root =
|
||||||
glrParse(ParseableStream, clang::pseudo::ParseParams{G, T, Arena, GSS},
|
glrParse(ParseableStream,
|
||||||
*G.findNonterminal("translation-unit"));
|
clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS},
|
||||||
|
*Lang.G.findNonterminal("translation-unit"));
|
||||||
if (Print)
|
if (Print)
|
||||||
llvm::outs() << Root.dumpRecursive(G);
|
llvm::outs() << Root.dumpRecursive(Lang.G);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -75,16 +60,11 @@ Fuzzer *Fuzz = nullptr;
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
|
||||||
// Set up the fuzzer from command line flags:
|
// Set up the fuzzer from command line flags:
|
||||||
// -grammar=<file> (required) - path to cxx.bnf
|
|
||||||
// -print - used for testing the fuzzer
|
// -print - used for testing the fuzzer
|
||||||
int LLVMFuzzerInitialize(int *Argc, char ***Argv) {
|
int LLVMFuzzerInitialize(int *Argc, char ***Argv) {
|
||||||
llvm::StringRef GrammarFile;
|
|
||||||
bool PrintForest = false;
|
bool PrintForest = false;
|
||||||
auto ConsumeArg = [&](llvm::StringRef Arg) -> bool {
|
auto ConsumeArg = [&](llvm::StringRef Arg) -> bool {
|
||||||
if (Arg.consume_front("-grammar=")) {
|
if (Arg == "-print") {
|
||||||
GrammarFile = Arg;
|
|
||||||
return true;
|
|
||||||
} else if (Arg == "-print") {
|
|
||||||
PrintForest = true;
|
PrintForest = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -92,11 +72,7 @@ int LLVMFuzzerInitialize(int *Argc, char ***Argv) {
|
||||||
};
|
};
|
||||||
*Argc = std::remove_if(*Argv + 1, *Argv + *Argc, ConsumeArg) - *Argv;
|
*Argc = std::remove_if(*Argv + 1, *Argv + *Argc, ConsumeArg) - *Argv;
|
||||||
|
|
||||||
if (GrammarFile.empty()) {
|
clang::pseudo::Fuzz = new clang::pseudo::Fuzzer(PrintForest);
|
||||||
fprintf(stderr, "Fuzzer needs -grammar=/path/to/cxx.bnf\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
clang::pseudo::Fuzz = new clang::pseudo::Fuzzer(GrammarFile, PrintForest);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -112,6 +112,7 @@ private:
|
||||||
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const GSS::Node &);
|
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const GSS::Node &);
|
||||||
|
|
||||||
// Parameters for the GLR parsing.
|
// Parameters for the GLR parsing.
|
||||||
|
// FIXME: refine it with the ParseLang struct.
|
||||||
struct ParseParams {
|
struct ParseParams {
|
||||||
// The grammar of the language we're going to parse.
|
// The grammar of the language we're going to parse.
|
||||||
const Grammar &G;
|
const Grammar &G;
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
//===--- Language.h -------------------------------------------- -*- C++-*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef CLANG_PSEUDO_GRAMMAR_LANGUAGE_H
|
||||||
|
#define CLANG_PSEUDO_GRAMMAR_LANGUAGE_H
|
||||||
|
|
||||||
|
#include "clang-pseudo/grammar/Grammar.h"
|
||||||
|
#include "clang-pseudo/grammar/LRTable.h"
|
||||||
|
|
||||||
|
namespace clang {
|
||||||
|
namespace pseudo {
|
||||||
|
|
||||||
|
// Specify a language that can be parsed by the pseduoparser.
|
||||||
|
struct Language {
|
||||||
|
Grammar G;
|
||||||
|
LRTable Table;
|
||||||
|
|
||||||
|
// FIXME: add clang::LangOptions.
|
||||||
|
// FIXME: add default start symbols.
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace pseudo
|
||||||
|
} // namespace clang
|
||||||
|
|
||||||
|
#endif // CLANG_PSEUDO_GRAMMAR_LANGUAGE_H
|
|
@ -0,0 +1,31 @@
|
||||||
|
//===--- ParseLang.h ------------------------------------------- -*- C++-*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef CLANG_PSEUDO_PARSELANG_H
|
||||||
|
#define CLANG_PSEUDO_PARSELANG_H
|
||||||
|
|
||||||
|
#include "clang-pseudo/grammar/Grammar.h"
|
||||||
|
#include "clang-pseudo/grammar/LRTable.h"
|
||||||
|
|
||||||
|
namespace clang {
|
||||||
|
namespace pseudo {
|
||||||
|
|
||||||
|
// Specify a language that can be parsed by the pseduoparser.
|
||||||
|
// Manifest generated from a bnf grammar file.
|
||||||
|
struct ParseLang {
|
||||||
|
Grammar G;
|
||||||
|
LRTable Table;
|
||||||
|
|
||||||
|
// FIXME: add clang::LangOptions.
|
||||||
|
// FIXME: add default start symbols.
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace pseudo
|
||||||
|
} // namespace clang
|
||||||
|
|
||||||
|
#endif // CLANG_PSEUDO_PARSELANG_H
|
|
@ -0,0 +1,35 @@
|
||||||
|
//===--- CLI.h - Get grammar from variant sources ----------------*- C++-*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// Provides the Grammar, LRTable etc for a language specified by the `--grammar`
|
||||||
|
// flags. It is by design to be used by pseudoparser-based CLI tools.
|
||||||
|
//
|
||||||
|
// The CLI library defines a `--grammar` CLI flag, which supports 1) using a
|
||||||
|
// grammar from a file (--grammar=/path/to/lang.bnf) or using the prebuilt cxx
|
||||||
|
// language (--grammar=cxx).
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef CLANG_PSEUDO_CLI_CLI_H
|
||||||
|
#define CLANG_PSEUDO_CLI_CLI_H
|
||||||
|
|
||||||
|
#include "clang-pseudo/Language.h"
|
||||||
|
|
||||||
|
namespace clang {
|
||||||
|
namespace pseudo {
|
||||||
|
|
||||||
|
// Returns the corresponding Language from the '--grammar' command-line flag.
|
||||||
|
//
|
||||||
|
// !! If the grammar flag is invalid (e.g. unexisting file), this function will
|
||||||
|
// exit the program immediately.
|
||||||
|
const Language &getLanguageFromFlags();
|
||||||
|
|
||||||
|
} // namespace pseudo
|
||||||
|
} // namespace clang
|
||||||
|
|
||||||
|
#endif // CLANG_PSEUDO_CLI_CLI_H
|
|
@ -23,12 +23,11 @@
|
||||||
#ifndef CLANG_PSEUDO_CXX_CXX_H
|
#ifndef CLANG_PSEUDO_CXX_CXX_H
|
||||||
#define CLANG_PSEUDO_CXX_CXX_H
|
#define CLANG_PSEUDO_CXX_CXX_H
|
||||||
|
|
||||||
|
#include "clang-pseudo/Language.h"
|
||||||
#include "clang-pseudo/grammar/Grammar.h"
|
#include "clang-pseudo/grammar/Grammar.h"
|
||||||
|
|
||||||
namespace clang {
|
namespace clang {
|
||||||
namespace pseudo {
|
namespace pseudo {
|
||||||
class LRTable;
|
|
||||||
|
|
||||||
namespace cxx {
|
namespace cxx {
|
||||||
// Symbol represents nonterminal symbols in the C++ grammar.
|
// Symbol represents nonterminal symbols in the C++ grammar.
|
||||||
// It provides a simple uniform way to access a particular nonterminal.
|
// It provides a simple uniform way to access a particular nonterminal.
|
||||||
|
@ -38,10 +37,8 @@ enum class Symbol : SymbolID {
|
||||||
#undef NONTERMINAL
|
#undef NONTERMINAL
|
||||||
};
|
};
|
||||||
|
|
||||||
// Returns the C++ grammar.
|
// Returns the Language for the cxx.bnf grammar.
|
||||||
const Grammar &getGrammar();
|
const Language &getLanguage();
|
||||||
// Returns the corresponding LRTable for the C++ grammar.
|
|
||||||
const LRTable &getLRTable();
|
|
||||||
|
|
||||||
} // namespace cxx
|
} // namespace cxx
|
||||||
|
|
||||||
|
|
|
@ -57,6 +57,7 @@
|
||||||
#include "llvm/ADT/DenseSet.h"
|
#include "llvm/ADT/DenseSet.h"
|
||||||
#include "llvm/ADT/Optional.h"
|
#include "llvm/ADT/Optional.h"
|
||||||
#include "llvm/ADT/StringRef.h"
|
#include "llvm/ADT/StringRef.h"
|
||||||
|
#include "llvm/Support/raw_ostream.h"
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
add_subdirectory(cli)
|
||||||
add_subdirectory(cxx)
|
add_subdirectory(cxx)
|
||||||
add_subdirectory(grammar)
|
add_subdirectory(grammar)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
//===--- CLI.cpp - ----------------------------------------------*- C++-*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "clang-pseudo/cli/CLI.h"
|
||||||
|
#include "clang-pseudo/cxx/CXX.h"
|
||||||
|
#include "llvm/Support/CommandLine.h"
|
||||||
|
#include "llvm/Support/ErrorOr.h"
|
||||||
|
#include "llvm/Support/MemoryBuffer.h"
|
||||||
|
|
||||||
|
static llvm::cl::opt<std::string> Grammar(
|
||||||
|
"grammar",
|
||||||
|
llvm::cl::desc(
|
||||||
|
"Specify a BNF grammar file path, or a builtin language (cxx)."),
|
||||||
|
llvm::cl::init("cxx"));
|
||||||
|
|
||||||
|
namespace clang {
|
||||||
|
namespace pseudo {
|
||||||
|
|
||||||
|
const Language &getLanguageFromFlags() {
|
||||||
|
if (::Grammar == "cxx")
|
||||||
|
return cxx::getLanguage();
|
||||||
|
|
||||||
|
static Language *Lang = []() {
|
||||||
|
// Read from a bnf grammar file.
|
||||||
|
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
|
||||||
|
llvm::MemoryBuffer::getFile(::Grammar);
|
||||||
|
if (std::error_code EC = GrammarText.getError()) {
|
||||||
|
llvm::errs() << "Error: can't read grammar file '" << ::Grammar
|
||||||
|
<< "': " << EC.message() << "\n";
|
||||||
|
std::exit(1);
|
||||||
|
}
|
||||||
|
std::vector<std::string> Diags;
|
||||||
|
auto G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags);
|
||||||
|
for (const auto &Diag : Diags)
|
||||||
|
llvm::errs() << Diag << "\n";
|
||||||
|
auto Table = LRTable::buildSLR(G);
|
||||||
|
return new Language{std::move(G), std::move(Table)};
|
||||||
|
}();
|
||||||
|
return *Lang;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace pseudo
|
||||||
|
} // namespace clang
|
|
@ -0,0 +1,11 @@
|
||||||
|
set(LLVM_LINK_COMPONENTS
|
||||||
|
Support
|
||||||
|
)
|
||||||
|
|
||||||
|
add_clang_library(clangPseudoCLI
|
||||||
|
CLI.cpp
|
||||||
|
|
||||||
|
LINK_LIBS
|
||||||
|
clangPseudoGrammar
|
||||||
|
clangPseudoCXX
|
||||||
|
)
|
|
@ -7,26 +7,33 @@
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include "clang-pseudo/cxx/CXX.h"
|
#include "clang-pseudo/cxx/CXX.h"
|
||||||
|
#include "clang-pseudo/Language.h"
|
||||||
|
#include "clang-pseudo/grammar/Grammar.h"
|
||||||
#include "clang-pseudo/grammar/LRTable.h"
|
#include "clang-pseudo/grammar/LRTable.h"
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
namespace clang {
|
namespace clang {
|
||||||
namespace pseudo {
|
namespace pseudo {
|
||||||
namespace cxx {
|
namespace cxx {
|
||||||
|
namespace {
|
||||||
static const char *CXXBNF =
|
static const char *CXXBNF =
|
||||||
#include "CXXBNF.inc"
|
#include "CXXBNF.inc"
|
||||||
;
|
;
|
||||||
|
} // namespace
|
||||||
|
|
||||||
const Grammar &getGrammar() {
|
const Language &getLanguage() {
|
||||||
static std::vector<std::string> Diags;
|
static const auto &CXXLanguage = []() -> const Language & {
|
||||||
static Grammar *G = new Grammar(Grammar::parseBNF(CXXBNF, Diags));
|
std::vector<std::string> Diags;
|
||||||
|
auto G = Grammar::parseBNF(CXXBNF, Diags);
|
||||||
assert(Diags.empty());
|
assert(Diags.empty());
|
||||||
return *G;
|
LRTable Table = LRTable::buildSLR(G);
|
||||||
}
|
const Language *PL = new Language{
|
||||||
|
std::move(G),
|
||||||
const LRTable &getLRTable() {
|
std::move(Table),
|
||||||
static LRTable *Table = new LRTable(LRTable::buildSLR(getGrammar()));
|
};
|
||||||
return *Table;
|
return *PL;
|
||||||
|
}();
|
||||||
|
return CXXLanguage;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace cxx
|
} // namespace cxx
|
||||||
|
|
|
@ -13,5 +13,6 @@ target_link_libraries(clang-pseudo
|
||||||
PRIVATE
|
PRIVATE
|
||||||
clangPseudo
|
clangPseudo
|
||||||
clangPseudoGrammar
|
clangPseudoGrammar
|
||||||
|
clangPseudoCLI
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,9 @@
|
||||||
#include "clang-pseudo/Bracket.h"
|
#include "clang-pseudo/Bracket.h"
|
||||||
#include "clang-pseudo/DirectiveTree.h"
|
#include "clang-pseudo/DirectiveTree.h"
|
||||||
#include "clang-pseudo/GLR.h"
|
#include "clang-pseudo/GLR.h"
|
||||||
|
#include "clang-pseudo/Language.h"
|
||||||
#include "clang-pseudo/Token.h"
|
#include "clang-pseudo/Token.h"
|
||||||
|
#include "clang-pseudo/cli/CLI.h"
|
||||||
#include "clang-pseudo/grammar/Grammar.h"
|
#include "clang-pseudo/grammar/Grammar.h"
|
||||||
#include "clang-pseudo/grammar/LRGraph.h"
|
#include "clang-pseudo/grammar/LRGraph.h"
|
||||||
#include "clang-pseudo/grammar/LRTable.h"
|
#include "clang-pseudo/grammar/LRTable.h"
|
||||||
|
@ -22,14 +24,11 @@
|
||||||
#include "llvm/Support/MemoryBuffer.h"
|
#include "llvm/Support/MemoryBuffer.h"
|
||||||
#include "llvm/Support/Signals.h"
|
#include "llvm/Support/Signals.h"
|
||||||
|
|
||||||
using clang::pseudo::Grammar;
|
|
||||||
using clang::pseudo::TokenStream;
|
using clang::pseudo::TokenStream;
|
||||||
using llvm::cl::desc;
|
using llvm::cl::desc;
|
||||||
using llvm::cl::init;
|
using llvm::cl::init;
|
||||||
using llvm::cl::opt;
|
using llvm::cl::opt;
|
||||||
|
|
||||||
static opt<std::string>
|
|
||||||
Grammar("grammar", desc("Parse and check a BNF grammar file."), init(""));
|
|
||||||
static opt<bool> PrintGrammar("print-grammar", desc("Print the grammar."));
|
static opt<bool> PrintGrammar("print-grammar", desc("Print the grammar."));
|
||||||
static opt<bool> PrintGraph("print-graph",
|
static opt<bool> PrintGraph("print-graph",
|
||||||
desc("Print the LR graph for the grammar"));
|
desc("Print the LR graph for the grammar"));
|
||||||
|
@ -123,42 +122,51 @@ int main(int argc, char *argv[]) {
|
||||||
pairBrackets(*ParseableStream);
|
pairBrackets(*ParseableStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Grammar.getNumOccurrences()) {
|
const auto &Lang = clang::pseudo::getLanguageFromFlags();
|
||||||
std::string Text = readOrDie(Grammar);
|
|
||||||
std::vector<std::string> Diags;
|
|
||||||
auto G = Grammar::parseBNF(Text, Diags);
|
|
||||||
|
|
||||||
if (!Diags.empty()) {
|
|
||||||
llvm::errs() << llvm::join(Diags, "\n");
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
llvm::outs() << llvm::formatv("grammar file {0} is parsed successfully\n",
|
|
||||||
Grammar);
|
|
||||||
if (PrintGrammar)
|
if (PrintGrammar)
|
||||||
llvm::outs() << G.dump();
|
llvm::outs() << Lang.G.dump();
|
||||||
if (PrintGraph)
|
if (PrintGraph)
|
||||||
llvm::outs() << clang::pseudo::LRGraph::buildLR0(G).dumpForTests(G);
|
llvm::outs() << clang::pseudo::LRGraph::buildLR0(Lang.G).dumpForTests(
|
||||||
auto LRTable = clang::pseudo::LRTable::buildSLR(G);
|
Lang.G);
|
||||||
|
|
||||||
if (PrintTable)
|
if (PrintTable)
|
||||||
llvm::outs() << LRTable.dumpForTests(G);
|
llvm::outs() << Lang.Table.dumpForTests(Lang.G);
|
||||||
if (PrintStatistics)
|
if (PrintStatistics)
|
||||||
llvm::outs() << LRTable.dumpStatistics();
|
llvm::outs() << Lang.Table.dumpStatistics();
|
||||||
|
|
||||||
if (ParseableStream) {
|
if (ParseableStream) {
|
||||||
clang::pseudo::ForestArena Arena;
|
clang::pseudo::ForestArena Arena;
|
||||||
clang::pseudo::GSS GSS;
|
clang::pseudo::GSS GSS;
|
||||||
llvm::Optional<clang::pseudo::SymbolID> StartSymID =
|
llvm::Optional<clang::pseudo::SymbolID> StartSymID =
|
||||||
G.findNonterminal(StartSymbol);
|
Lang.G.findNonterminal(StartSymbol);
|
||||||
if (!StartSymID) {
|
if (!StartSymID) {
|
||||||
llvm::errs() << llvm::formatv(
|
llvm::errs() << llvm::formatv(
|
||||||
"The start symbol {0} doesn't exit in the grammar!\n", Grammar);
|
"The start symbol {0} doesn't exit in the grammar!\n", StartSymbol);
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
auto &Root = glrParse(*ParseableStream,
|
auto &Root =
|
||||||
clang::pseudo::ParseParams{G, LRTable, Arena, GSS},
|
glrParse(*ParseableStream,
|
||||||
|
clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS},
|
||||||
*StartSymID);
|
*StartSymID);
|
||||||
if (PrintForest)
|
if (PrintForest)
|
||||||
llvm::outs() << Root.dumpRecursive(G, /*Abbreviated=*/true);
|
llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true);
|
||||||
|
|
||||||
|
if (ParseableStream) {
|
||||||
|
clang::pseudo::ForestArena Arena;
|
||||||
|
clang::pseudo::GSS GSS;
|
||||||
|
llvm::Optional<clang::pseudo::SymbolID> StartSymID =
|
||||||
|
Lang.G.findNonterminal(StartSymbol);
|
||||||
|
if (!StartSymID) {
|
||||||
|
llvm::errs() << llvm::formatv(
|
||||||
|
"The start symbol {0} doesn't exit in the grammar!\n", StartSymbol);
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
auto &Root =
|
||||||
|
glrParse(*ParseableStream,
|
||||||
|
clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS},
|
||||||
|
*StartSymID);
|
||||||
|
if (PrintForest)
|
||||||
|
llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true);
|
||||||
|
|
||||||
if (PrintStatistics) {
|
if (PrintStatistics) {
|
||||||
llvm::outs() << "Forest bytes: " << Arena.bytes()
|
llvm::outs() << "Forest bytes: " << Arena.bytes()
|
||||||
|
@ -174,7 +182,7 @@ int main(int argc, char *argv[]) {
|
||||||
llvm::outs() << "\n" << Stats.Total << " " << P.first << " nodes:\n";
|
llvm::outs() << "\n" << Stats.Total << " " << P.first << " nodes:\n";
|
||||||
for (const auto &S : Stats.BySymbol)
|
for (const auto &S : Stats.BySymbol)
|
||||||
llvm::outs() << llvm::formatv(" {0,3} {1}\n", S.second,
|
llvm::outs() << llvm::formatv(" {0,3} {1}\n", S.second,
|
||||||
G.symbolName(S.first));
|
Lang.G.symbolName(S.first));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
|
|
||||||
#include "clang-pseudo/GLR.h"
|
#include "clang-pseudo/GLR.h"
|
||||||
#include "clang-pseudo/Token.h"
|
#include "clang-pseudo/Token.h"
|
||||||
|
#include "clang-pseudo/Language.h"
|
||||||
#include "clang-pseudo/grammar/Grammar.h"
|
#include "clang-pseudo/grammar/Grammar.h"
|
||||||
#include "clang/Basic/LangOptions.h"
|
#include "clang/Basic/LangOptions.h"
|
||||||
#include "clang/Basic/TokenKinds.h"
|
#include "clang/Basic/TokenKinds.h"
|
||||||
|
@ -48,7 +49,13 @@ class GLRTest : public ::testing::Test {
|
||||||
public:
|
public:
|
||||||
void build(llvm::StringRef GrammarBNF) {
|
void build(llvm::StringRef GrammarBNF) {
|
||||||
std::vector<std::string> Diags;
|
std::vector<std::string> Diags;
|
||||||
G = Grammar::parseBNF(GrammarBNF, Diags);
|
TestLang.G = Grammar::parseBNF(GrammarBNF, Diags);
|
||||||
|
}
|
||||||
|
|
||||||
|
TokenStream emptyTokenStream() {
|
||||||
|
TokenStream Empty;
|
||||||
|
Empty.finalize();
|
||||||
|
return Empty;
|
||||||
}
|
}
|
||||||
|
|
||||||
void buildGrammar(std::vector<std::string> Nonterminals,
|
void buildGrammar(std::vector<std::string> Nonterminals,
|
||||||
|
@ -66,19 +73,22 @@ public:
|
||||||
|
|
||||||
SymbolID id(llvm::StringRef Name) const {
|
SymbolID id(llvm::StringRef Name) const {
|
||||||
for (unsigned I = 0; I < NumTerminals; ++I)
|
for (unsigned I = 0; I < NumTerminals; ++I)
|
||||||
if (G.table().Terminals[I] == Name)
|
if (TestLang.G.table().Terminals[I] == Name)
|
||||||
return tokenSymbol(static_cast<tok::TokenKind>(I));
|
return tokenSymbol(static_cast<tok::TokenKind>(I));
|
||||||
for (SymbolID ID = 0; ID < G.table().Nonterminals.size(); ++ID)
|
for (SymbolID ID = 0; ID < TestLang.G.table().Nonterminals.size(); ++ID)
|
||||||
if (G.table().Nonterminals[ID].Name == Name)
|
if (TestLang.G.table().Nonterminals[ID].Name == Name)
|
||||||
return ID;
|
return ID;
|
||||||
ADD_FAILURE() << "No such symbol found: " << Name;
|
ADD_FAILURE() << "No such symbol found: " << Name;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
RuleID ruleFor(llvm::StringRef NonterminalName) const {
|
RuleID ruleFor(llvm::StringRef NonterminalName) const {
|
||||||
auto RuleRange = G.table().Nonterminals[id(NonterminalName)].RuleRange;
|
auto RuleRange =
|
||||||
|
TestLang.G.table().Nonterminals[id(NonterminalName)].RuleRange;
|
||||||
if (RuleRange.End - RuleRange.Start == 1)
|
if (RuleRange.End - RuleRange.Start == 1)
|
||||||
return G.table().Nonterminals[id(NonterminalName)].RuleRange.Start;
|
return TestLang.G.table()
|
||||||
|
.Nonterminals[id(NonterminalName)]
|
||||||
|
.RuleRange.Start;
|
||||||
ADD_FAILURE() << "Expected a single rule for " << NonterminalName
|
ADD_FAILURE() << "Expected a single rule for " << NonterminalName
|
||||||
<< ", but it has " << RuleRange.End - RuleRange.Start
|
<< ", but it has " << RuleRange.End - RuleRange.Start
|
||||||
<< " rule!\n";
|
<< " rule!\n";
|
||||||
|
@ -86,7 +96,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Grammar G;
|
Language TestLang;
|
||||||
ForestArena Arena;
|
ForestArena Arena;
|
||||||
GSS GSStack;
|
GSS GSStack;
|
||||||
};
|
};
|
||||||
|
@ -112,9 +122,8 @@ TEST_F(GLRTest, ShiftMergingHeads) {
|
||||||
/*Parents=*/{GSSNode0});
|
/*Parents=*/{GSSNode0});
|
||||||
|
|
||||||
buildGrammar({}, {}); // Create a fake empty grammar.
|
buildGrammar({}, {}); // Create a fake empty grammar.
|
||||||
LRTable T =
|
TestLang.Table =
|
||||||
LRTable::buildForTests(G, /*Entries=*/
|
LRTable::buildForTests(TestLang.G, /*Entries=*/{
|
||||||
{
|
|
||||||
{1, tokenSymbol(tok::semi), Action::shift(4)},
|
{1, tokenSymbol(tok::semi), Action::shift(4)},
|
||||||
{2, tokenSymbol(tok::semi), Action::shift(4)},
|
{2, tokenSymbol(tok::semi), Action::shift(4)},
|
||||||
{3, tokenSymbol(tok::semi), Action::shift(5)},
|
{3, tokenSymbol(tok::semi), Action::shift(5)},
|
||||||
|
@ -123,8 +132,8 @@ TEST_F(GLRTest, ShiftMergingHeads) {
|
||||||
|
|
||||||
ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0);
|
ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0);
|
||||||
std::vector<const GSS::Node *> NewHeads;
|
std::vector<const GSS::Node *> NewHeads;
|
||||||
glrShift({GSSNode1, GSSNode2, GSSNode3}, SemiTerminal, {G, T, Arena, GSStack},
|
glrShift({GSSNode1, GSSNode2, GSSNode3}, SemiTerminal,
|
||||||
NewHeads);
|
{TestLang.G, TestLang.Table, Arena, GSStack}, NewHeads);
|
||||||
|
|
||||||
EXPECT_THAT(NewHeads,
|
EXPECT_THAT(NewHeads,
|
||||||
UnorderedElementsAre(AllOf(state(4), parsedSymbol(&SemiTerminal),
|
UnorderedElementsAre(AllOf(state(4), parsedSymbol(&SemiTerminal),
|
||||||
|
@ -144,8 +153,8 @@ TEST_F(GLRTest, ReduceConflictsSplitting) {
|
||||||
buildGrammar({"class-name", "enum-name"},
|
buildGrammar({"class-name", "enum-name"},
|
||||||
{"class-name := IDENTIFIER", "enum-name := IDENTIFIER"});
|
{"class-name := IDENTIFIER", "enum-name := IDENTIFIER"});
|
||||||
|
|
||||||
LRTable Table = LRTable::buildForTests(
|
TestLang.Table = LRTable::buildForTests(
|
||||||
G,
|
TestLang.G,
|
||||||
{
|
{
|
||||||
{/*State=*/0, id("class-name"), Action::goTo(2)},
|
{/*State=*/0, id("class-name"), Action::goTo(2)},
|
||||||
{/*State=*/0, id("enum-name"), Action::goTo(3)},
|
{/*State=*/0, id("enum-name"), Action::goTo(3)},
|
||||||
|
@ -161,7 +170,8 @@ TEST_F(GLRTest, ReduceConflictsSplitting) {
|
||||||
GSStack.addNode(1, &Arena.createTerminal(tok::identifier, 0), {GSSNode0});
|
GSStack.addNode(1, &Arena.createTerminal(tok::identifier, 0), {GSSNode0});
|
||||||
|
|
||||||
std::vector<const GSS::Node *> Heads = {GSSNode1};
|
std::vector<const GSS::Node *> Heads = {GSSNode1};
|
||||||
glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack});
|
glrReduce(Heads, tokenSymbol(tok::eof),
|
||||||
|
{TestLang.G, TestLang.Table, Arena, GSStack});
|
||||||
EXPECT_THAT(Heads, UnorderedElementsAre(
|
EXPECT_THAT(Heads, UnorderedElementsAre(
|
||||||
GSSNode1,
|
GSSNode1,
|
||||||
AllOf(state(2), parsedSymbolID(id("class-name")),
|
AllOf(state(2), parsedSymbolID(id("class-name")),
|
||||||
|
@ -192,8 +202,8 @@ TEST_F(GLRTest, ReduceSplittingDueToMultipleBases) {
|
||||||
/*State=*/4, &Arena.createTerminal(tok::star, /*TokenIndex=*/1),
|
/*State=*/4, &Arena.createTerminal(tok::star, /*TokenIndex=*/1),
|
||||||
/*Parents=*/{GSSNode2, GSSNode3});
|
/*Parents=*/{GSSNode2, GSSNode3});
|
||||||
|
|
||||||
LRTable Table = LRTable::buildForTests(
|
TestLang.Table = LRTable::buildForTests(
|
||||||
G,
|
TestLang.G,
|
||||||
{
|
{
|
||||||
{/*State=*/2, id("ptr-operator"), Action::goTo(/*NextState=*/5)},
|
{/*State=*/2, id("ptr-operator"), Action::goTo(/*NextState=*/5)},
|
||||||
{/*State=*/3, id("ptr-operator"), Action::goTo(/*NextState=*/6)},
|
{/*State=*/3, id("ptr-operator"), Action::goTo(/*NextState=*/6)},
|
||||||
|
@ -202,7 +212,7 @@ TEST_F(GLRTest, ReduceSplittingDueToMultipleBases) {
|
||||||
{/*State=*/4, ruleFor("ptr-operator")},
|
{/*State=*/4, ruleFor("ptr-operator")},
|
||||||
});
|
});
|
||||||
std::vector<const GSS::Node *> Heads = {GSSNode4};
|
std::vector<const GSS::Node *> Heads = {GSSNode4};
|
||||||
glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack});
|
glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack});
|
||||||
|
|
||||||
EXPECT_THAT(Heads, UnorderedElementsAre(
|
EXPECT_THAT(Heads, UnorderedElementsAre(
|
||||||
GSSNode4,
|
GSSNode4,
|
||||||
|
@ -246,8 +256,8 @@ TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
|
||||||
/*Parents=*/{GSSNode2});
|
/*Parents=*/{GSSNode2});
|
||||||
|
|
||||||
// FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
|
// FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
|
||||||
LRTable Table = LRTable::buildForTests(
|
TestLang.Table = LRTable::buildForTests(
|
||||||
G,
|
TestLang.G,
|
||||||
{
|
{
|
||||||
{/*State=*/1, id("type-name"), Action::goTo(/*NextState=*/5)},
|
{/*State=*/1, id("type-name"), Action::goTo(/*NextState=*/5)},
|
||||||
{/*State=*/2, id("type-name"), Action::goTo(/*NextState=*/5)},
|
{/*State=*/2, id("type-name"), Action::goTo(/*NextState=*/5)},
|
||||||
|
@ -257,7 +267,7 @@ TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
|
||||||
{/*State=*/4, /* type-name := enum-name */ 1},
|
{/*State=*/4, /* type-name := enum-name */ 1},
|
||||||
});
|
});
|
||||||
std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
|
std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
|
||||||
glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack});
|
glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack});
|
||||||
|
|
||||||
// Verify that the stack heads are joint at state 5 after reduces.
|
// Verify that the stack heads are joint at state 5 after reduces.
|
||||||
EXPECT_THAT(Heads, UnorderedElementsAre(GSSNode3, GSSNode4,
|
EXPECT_THAT(Heads, UnorderedElementsAre(GSSNode3, GSSNode4,
|
||||||
|
@ -266,7 +276,7 @@ TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
|
||||||
parents({GSSNode1, GSSNode2}))))
|
parents({GSSNode1, GSSNode2}))))
|
||||||
<< Heads;
|
<< Heads;
|
||||||
// Verify that we create an ambiguous ForestNode of two parses of `type-name`.
|
// Verify that we create an ambiguous ForestNode of two parses of `type-name`.
|
||||||
EXPECT_EQ(Heads.back()->Payload->dumpRecursive(G),
|
EXPECT_EQ(Heads.back()->Payload->dumpRecursive(TestLang.G),
|
||||||
"[ 1, end) type-name := <ambiguous>\n"
|
"[ 1, end) type-name := <ambiguous>\n"
|
||||||
"[ 1, end) ├─type-name := class-name\n"
|
"[ 1, end) ├─type-name := class-name\n"
|
||||||
"[ 1, end) │ └─class-name := <opaque>\n"
|
"[ 1, end) │ └─class-name := <opaque>\n"
|
||||||
|
@ -304,8 +314,8 @@ TEST_F(GLRTest, ReduceJoiningWithSameBase) {
|
||||||
/*Parents=*/{GSSNode2});
|
/*Parents=*/{GSSNode2});
|
||||||
|
|
||||||
// FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
|
// FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
|
||||||
LRTable Table =
|
TestLang.Table =
|
||||||
LRTable::buildForTests(G,
|
LRTable::buildForTests(TestLang.G,
|
||||||
{
|
{
|
||||||
{/*State=*/0, id("pointer"), Action::goTo(5)},
|
{/*State=*/0, id("pointer"), Action::goTo(5)},
|
||||||
},
|
},
|
||||||
|
@ -314,14 +324,15 @@ TEST_F(GLRTest, ReduceJoiningWithSameBase) {
|
||||||
{4, /* pointer := enum-name */ 1},
|
{4, /* pointer := enum-name */ 1},
|
||||||
});
|
});
|
||||||
std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
|
std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
|
||||||
glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack});
|
glrReduce(Heads, tokenSymbol(tok::eof),
|
||||||
|
{TestLang.G, TestLang.Table, Arena, GSStack});
|
||||||
|
|
||||||
EXPECT_THAT(
|
EXPECT_THAT(
|
||||||
Heads, UnorderedElementsAre(GSSNode3, GSSNode4,
|
Heads, UnorderedElementsAre(GSSNode3, GSSNode4,
|
||||||
AllOf(state(5), parsedSymbolID(id("pointer")),
|
AllOf(state(5), parsedSymbolID(id("pointer")),
|
||||||
parents({GSSNode0}))))
|
parents({GSSNode0}))))
|
||||||
<< Heads;
|
<< Heads;
|
||||||
EXPECT_EQ(Heads.back()->Payload->dumpRecursive(G),
|
EXPECT_EQ(Heads.back()->Payload->dumpRecursive(TestLang.G),
|
||||||
"[ 0, end) pointer := <ambiguous>\n"
|
"[ 0, end) pointer := <ambiguous>\n"
|
||||||
"[ 0, end) ├─pointer := class-name *\n"
|
"[ 0, end) ├─pointer := class-name *\n"
|
||||||
"[ 0, 1) │ ├─class-name := <opaque>\n"
|
"[ 0, 1) │ ├─class-name := <opaque>\n"
|
||||||
|
@ -334,8 +345,8 @@ TEST_F(GLRTest, ReduceJoiningWithSameBase) {
|
||||||
TEST_F(GLRTest, ReduceLookahead) {
|
TEST_F(GLRTest, ReduceLookahead) {
|
||||||
// A term can be followed by +, but not by -.
|
// A term can be followed by +, but not by -.
|
||||||
buildGrammar({"sum", "term"}, {"expr := term + term", "term := IDENTIFIER"});
|
buildGrammar({"sum", "term"}, {"expr := term + term", "term := IDENTIFIER"});
|
||||||
LRTable Table =
|
TestLang.Table =
|
||||||
LRTable::buildForTests(G,
|
LRTable::buildForTests(TestLang.G,
|
||||||
{
|
{
|
||||||
{/*State=*/0, id("term"), Action::goTo(2)},
|
{/*State=*/0, id("term"), Action::goTo(2)},
|
||||||
},
|
},
|
||||||
|
@ -352,14 +363,14 @@ TEST_F(GLRTest, ReduceLookahead) {
|
||||||
|
|
||||||
// When the lookahead is +, reduce is performed.
|
// When the lookahead is +, reduce is performed.
|
||||||
std::vector<const GSS::Node *> Heads = {GSSNode1};
|
std::vector<const GSS::Node *> Heads = {GSSNode1};
|
||||||
glrReduce(Heads, tokenSymbol(tok::plus), {G, Table, Arena, GSStack});
|
glrReduce(Heads, tokenSymbol(tok::plus), {TestLang.G, TestLang.Table, Arena, GSStack});
|
||||||
EXPECT_THAT(Heads,
|
EXPECT_THAT(Heads,
|
||||||
ElementsAre(GSSNode1, AllOf(state(2), parsedSymbolID(id("term")),
|
ElementsAre(GSSNode1, AllOf(state(2), parsedSymbolID(id("term")),
|
||||||
parents(Root))));
|
parents(Root))));
|
||||||
|
|
||||||
// When the lookahead is -, reduce is not performed.
|
// When the lookahead is -, reduce is not performed.
|
||||||
Heads = {GSSNode1};
|
Heads = {GSSNode1};
|
||||||
glrReduce(Heads, tokenSymbol(tok::minus), {G, Table, Arena, GSStack});
|
glrReduce(Heads, tokenSymbol(tok::minus), {TestLang.G, TestLang.Table, Arena, GSStack});
|
||||||
EXPECT_THAT(Heads, ElementsAre(GSSNode1));
|
EXPECT_THAT(Heads, ElementsAre(GSSNode1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -380,15 +391,16 @@ TEST_F(GLRTest, PerfectForestNodeSharing) {
|
||||||
left-paren := {
|
left-paren := {
|
||||||
expr := IDENTIFIER
|
expr := IDENTIFIER
|
||||||
)bnf");
|
)bnf");
|
||||||
|
TestLang.Table = LRTable::buildSLR(TestLang.G);
|
||||||
clang::LangOptions LOptions;
|
clang::LangOptions LOptions;
|
||||||
const TokenStream &Tokens = cook(lex("{ abc", LOptions), LOptions);
|
const TokenStream &Tokens = cook(lex("{ abc", LOptions), LOptions);
|
||||||
auto LRTable = LRTable::buildSLR(G);
|
|
||||||
|
|
||||||
const ForestNode &Parsed =
|
const ForestNode &Parsed =
|
||||||
glrParse(Tokens, {G, LRTable, Arena, GSStack}, id("test"));
|
glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
|
||||||
// Verify that there is no duplicated sequence node of `expr := IDENTIFIER`
|
// Verify that there is no duplicated sequence node of `expr := IDENTIFIER`
|
||||||
// in the forest, see the `#1` and `=#1` in the dump string.
|
// in the forest, see the `#1` and `=#1` in the dump string.
|
||||||
EXPECT_EQ(Parsed.dumpRecursive(G), "[ 0, end) test := <ambiguous>\n"
|
EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
|
||||||
|
"[ 0, end) test := <ambiguous>\n"
|
||||||
"[ 0, end) ├─test := { expr\n"
|
"[ 0, end) ├─test := { expr\n"
|
||||||
"[ 0, 1) │ ├─{ := tok[0]\n"
|
"[ 0, 1) │ ├─{ := tok[0]\n"
|
||||||
"[ 1, end) │ └─expr := IDENTIFIER #1\n"
|
"[ 1, end) │ └─expr := IDENTIFIER #1\n"
|
||||||
|
@ -418,11 +430,12 @@ TEST_F(GLRTest, GLRReduceOrder) {
|
||||||
)bnf");
|
)bnf");
|
||||||
clang::LangOptions LOptions;
|
clang::LangOptions LOptions;
|
||||||
const TokenStream &Tokens = cook(lex("IDENTIFIER", LOptions), LOptions);
|
const TokenStream &Tokens = cook(lex("IDENTIFIER", LOptions), LOptions);
|
||||||
auto LRTable = LRTable::buildSLR(G);
|
TestLang.Table = LRTable::buildSLR(TestLang.G);
|
||||||
|
|
||||||
const ForestNode &Parsed =
|
const ForestNode &Parsed =
|
||||||
glrParse(Tokens, {G, LRTable, Arena, GSStack}, id("test"));
|
glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
|
||||||
EXPECT_EQ(Parsed.dumpRecursive(G), "[ 0, end) test := <ambiguous>\n"
|
EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
|
||||||
|
"[ 0, end) test := <ambiguous>\n"
|
||||||
"[ 0, end) ├─test := IDENTIFIER\n"
|
"[ 0, end) ├─test := IDENTIFIER\n"
|
||||||
"[ 0, end) │ └─IDENTIFIER := tok[0]\n"
|
"[ 0, end) │ └─IDENTIFIER := tok[0]\n"
|
||||||
"[ 0, end) └─test := foo\n"
|
"[ 0, end) └─test := foo\n"
|
||||||
|
@ -442,11 +455,12 @@ TEST_F(GLRTest, NoExplicitAccept) {
|
||||||
// of the nonterminal `test` when the next token is `eof`, verify that the
|
// of the nonterminal `test` when the next token is `eof`, verify that the
|
||||||
// parser stops at the right state.
|
// parser stops at the right state.
|
||||||
const TokenStream &Tokens = cook(lex("id id", LOptions), LOptions);
|
const TokenStream &Tokens = cook(lex("id id", LOptions), LOptions);
|
||||||
auto LRTable = LRTable::buildSLR(G);
|
TestLang.Table = LRTable::buildSLR(TestLang.G);
|
||||||
|
|
||||||
const ForestNode &Parsed =
|
const ForestNode &Parsed =
|
||||||
glrParse(Tokens, {G, LRTable, Arena, GSStack}, id("test"));
|
glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
|
||||||
EXPECT_EQ(Parsed.dumpRecursive(G), "[ 0, end) test := IDENTIFIER test\n"
|
EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
|
||||||
|
"[ 0, end) test := IDENTIFIER test\n"
|
||||||
"[ 0, 1) ├─IDENTIFIER := tok[0]\n"
|
"[ 0, 1) ├─IDENTIFIER := tok[0]\n"
|
||||||
"[ 1, end) └─test := IDENTIFIER\n"
|
"[ 1, end) └─test := IDENTIFIER\n"
|
||||||
"[ 1, end) └─IDENTIFIER := tok[1]\n");
|
"[ 1, end) └─IDENTIFIER := tok[1]\n");
|
||||||
|
|
Loading…
Reference in New Issue