[pseudo] Define a clangPseudoCLI library.

- define a common data structure Language which is a compiled result of the
  bnf grammar. It is defined in Language.h;
- creates a clangPseudoCLI lib which defines a grammar commandline flag and
  expose a function to get the Language. It supports --grammar=cxx,
  --grammmar=/path/to/file.bnf;
- use the clangPseudoCLI in clang-pseudo, fuzzer, and benchmark tools (
  simplify the code and use the prebuilt cxx grammar);

Split out from https://reviews.llvm.org/D127448.

Differential Revision: https://reviews.llvm.org/D128679
This commit is contained in:
Haojian Wu 2022-06-28 22:37:03 +02:00
parent 39377d5227
commit fe66aebd75
17 changed files with 310 additions and 159 deletions

View File

@ -25,6 +25,7 @@
#include "clang-pseudo/Forest.h"
#include "clang-pseudo/GLR.h"
#include "clang-pseudo/Token.h"
#include "clang-pseudo/cli/CLI.h"
#include "clang-pseudo/grammar/Grammar.h"
#include "clang-pseudo/grammar/LRTable.h"
#include "clang/Basic/LangOptions.h"
@ -39,9 +40,6 @@ using llvm::cl::desc;
using llvm::cl::opt;
using llvm::cl::Required;
static opt<std::string> GrammarFile("grammar",
desc("Parse and check a BNF grammar file."),
Required);
static opt<std::string> Source("source", desc("Source file"), Required);
namespace clang {
@ -49,11 +47,10 @@ namespace pseudo {
namespace bench {
namespace {
const std::string *GrammarText = nullptr;
const std::string *SourceText = nullptr;
const Grammar *G = nullptr;
const Language *Lang = nullptr;
void setupGrammarAndSource() {
void setup() {
auto ReadFile = [](llvm::StringRef FilePath) -> std::string {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
llvm::MemoryBuffer::getFile(FilePath);
@ -64,22 +61,13 @@ void setupGrammarAndSource() {
}
return GrammarText.get()->getBuffer().str();
};
GrammarText = new std::string(ReadFile(GrammarFile));
SourceText = new std::string(ReadFile(Source));
std::vector<std::string> Diags;
G = new Grammar(Grammar::parseBNF(*GrammarText, Diags));
Lang = &getLanguageFromFlags();
}
static void parseBNF(benchmark::State &State) {
std::vector<std::string> Diags;
for (auto _ : State)
Grammar::parseBNF(*GrammarText, Diags);
}
BENCHMARK(parseBNF);
static void buildSLR(benchmark::State &State) {
for (auto _ : State)
LRTable::buildSLR(*G);
LRTable::buildSLR(Lang->G);
}
BENCHMARK(buildSLR);
@ -129,13 +117,13 @@ static void preprocess(benchmark::State &State) {
BENCHMARK(preprocess);
static void glrParse(benchmark::State &State) {
LRTable Table = clang::pseudo::LRTable::buildSLR(*G);
SymbolID StartSymbol = *G->findNonterminal("translation-unit");
SymbolID StartSymbol = *Lang->G->findNonterminal("translation-unit");
TokenStream Stream = lexAndPreprocess();
for (auto _ : State) {
pseudo::ForestArena Forest;
pseudo::GSS GSS;
pseudo::glrParse(Stream, ParseParams{*G, Table, Forest, GSS}, StartSymbol);
pseudo::glrParse(Stream, ParseParams{*Lang->G, Lang->Table, Forest, GSS},
StartSymbol);
}
State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
SourceText->size());
@ -143,13 +131,13 @@ static void glrParse(benchmark::State &State) {
BENCHMARK(glrParse);
static void full(benchmark::State &State) {
LRTable Table = clang::pseudo::LRTable::buildSLR(*G);
SymbolID StartSymbol = *G->findNonterminal("translation-unit");
SymbolID StartSymbol = *Lang->G.findNonterminal("translation-unit");
for (auto _ : State) {
TokenStream Stream = lexAndPreprocess();
pseudo::ForestArena Forest;
pseudo::GSS GSS;
pseudo::glrParse(lexAndPreprocess(), ParseParams{*G, Table, Forest, GSS},
pseudo::glrParse(lexAndPreprocess(),
ParseParams{Lang->G, Lang->Table, Forest, GSS},
StartSymbol);
}
State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
@ -165,7 +153,7 @@ BENCHMARK(full);
int main(int argc, char *argv[]) {
benchmark::Initialize(&argc, argv);
llvm::cl::ParseCommandLineOptions(argc, argv);
clang::pseudo::bench::setupGrammarAndSource();
clang::pseudo::bench::setup();
benchmark::RunSpecifiedBenchmarks();
return 0;
}

View File

@ -3,6 +3,7 @@ add_benchmark(ClangPseudoBenchmark Benchmark.cpp)
target_link_libraries(ClangPseudoBenchmark
PRIVATE
clangPseudo
clangPseudoCLI
clangPseudoGrammar
LLVMSupport
)

View File

@ -11,5 +11,6 @@ add_llvm_fuzzer(clang-pseudo-fuzzer
target_link_libraries(clang-pseudo-fuzzer
PRIVATE
clangPseudo
clangPseudoCLI
clangPseudoGrammar
)

View File

@ -10,6 +10,7 @@
#include "clang-pseudo/Forest.h"
#include "clang-pseudo/GLR.h"
#include "clang-pseudo/Token.h"
#include "clang-pseudo/cli/CLI.h"
#include "clang-pseudo/grammar/Grammar.h"
#include "clang-pseudo/grammar/LRTable.h"
#include "clang/Basic/LangOptions.h"
@ -24,28 +25,10 @@ namespace {
class Fuzzer {
clang::LangOptions LangOpts = clang::pseudo::genericLangOpts();
Grammar G;
LRTable T;
bool Print;
public:
Fuzzer(llvm::StringRef GrammarPath, bool Print) : Print(Print) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
llvm::MemoryBuffer::getFile(GrammarPath);
if (std::error_code EC = GrammarText.getError()) {
llvm::errs() << "Error: can't read grammar file '" << GrammarPath
<< "': " << EC.message() << "\n";
std::exit(1);
}
std::vector<std::string> Diags;
G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags);
if (!Diags.empty()) {
for (const auto &Diag : Diags)
llvm::errs() << Diag << "\n";
std::exit(1);
}
T = LRTable::buildSLR(G);
}
Fuzzer(bool Print) : Print(Print) {}
void operator()(llvm::StringRef Code) {
std::string CodeStr = Code.str(); // Must be null-terminated.
@ -58,11 +41,13 @@ public:
clang::pseudo::ForestArena Arena;
clang::pseudo::GSS GSS;
const Language &Lang = getLanguageFromFlags();
auto &Root =
glrParse(ParseableStream, clang::pseudo::ParseParams{G, T, Arena, GSS},
*G.findNonterminal("translation-unit"));
glrParse(ParseableStream,
clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS},
*Lang.G.findNonterminal("translation-unit"));
if (Print)
llvm::outs() << Root.dumpRecursive(G);
llvm::outs() << Root.dumpRecursive(Lang.G);
}
};
@ -75,16 +60,11 @@ Fuzzer *Fuzz = nullptr;
extern "C" {
// Set up the fuzzer from command line flags:
// -grammar=<file> (required) - path to cxx.bnf
// -print - used for testing the fuzzer
int LLVMFuzzerInitialize(int *Argc, char ***Argv) {
llvm::StringRef GrammarFile;
bool PrintForest = false;
auto ConsumeArg = [&](llvm::StringRef Arg) -> bool {
if (Arg.consume_front("-grammar=")) {
GrammarFile = Arg;
return true;
} else if (Arg == "-print") {
if (Arg == "-print") {
PrintForest = true;
return true;
}
@ -92,11 +72,7 @@ int LLVMFuzzerInitialize(int *Argc, char ***Argv) {
};
*Argc = std::remove_if(*Argv + 1, *Argv + *Argc, ConsumeArg) - *Argv;
if (GrammarFile.empty()) {
fprintf(stderr, "Fuzzer needs -grammar=/path/to/cxx.bnf\n");
exit(1);
}
clang::pseudo::Fuzz = new clang::pseudo::Fuzzer(GrammarFile, PrintForest);
clang::pseudo::Fuzz = new clang::pseudo::Fuzzer(PrintForest);
return 0;
}

View File

@ -112,6 +112,7 @@ private:
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const GSS::Node &);
// Parameters for the GLR parsing.
// FIXME: refine it with the ParseLang struct.
struct ParseParams {
// The grammar of the language we're going to parse.
const Grammar &G;

View File

@ -0,0 +1,30 @@
//===--- Language.h -------------------------------------------- -*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef CLANG_PSEUDO_GRAMMAR_LANGUAGE_H
#define CLANG_PSEUDO_GRAMMAR_LANGUAGE_H
#include "clang-pseudo/grammar/Grammar.h"
#include "clang-pseudo/grammar/LRTable.h"
namespace clang {
namespace pseudo {
// Specify a language that can be parsed by the pseduoparser.
struct Language {
Grammar G;
LRTable Table;
// FIXME: add clang::LangOptions.
// FIXME: add default start symbols.
};
} // namespace pseudo
} // namespace clang
#endif // CLANG_PSEUDO_GRAMMAR_LANGUAGE_H

View File

@ -0,0 +1,31 @@
//===--- ParseLang.h ------------------------------------------- -*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef CLANG_PSEUDO_PARSELANG_H
#define CLANG_PSEUDO_PARSELANG_H
#include "clang-pseudo/grammar/Grammar.h"
#include "clang-pseudo/grammar/LRTable.h"
namespace clang {
namespace pseudo {
// Specify a language that can be parsed by the pseduoparser.
// Manifest generated from a bnf grammar file.
struct ParseLang {
Grammar G;
LRTable Table;
// FIXME: add clang::LangOptions.
// FIXME: add default start symbols.
};
} // namespace pseudo
} // namespace clang
#endif // CLANG_PSEUDO_PARSELANG_H

View File

@ -0,0 +1,35 @@
//===--- CLI.h - Get grammar from variant sources ----------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Provides the Grammar, LRTable etc for a language specified by the `--grammar`
// flags. It is by design to be used by pseudoparser-based CLI tools.
//
// The CLI library defines a `--grammar` CLI flag, which supports 1) using a
// grammar from a file (--grammar=/path/to/lang.bnf) or using the prebuilt cxx
// language (--grammar=cxx).
//
//===----------------------------------------------------------------------===//
#ifndef CLANG_PSEUDO_CLI_CLI_H
#define CLANG_PSEUDO_CLI_CLI_H
#include "clang-pseudo/Language.h"
namespace clang {
namespace pseudo {
// Returns the corresponding Language from the '--grammar' command-line flag.
//
// !! If the grammar flag is invalid (e.g. unexisting file), this function will
// exit the program immediately.
const Language &getLanguageFromFlags();
} // namespace pseudo
} // namespace clang
#endif // CLANG_PSEUDO_CLI_CLI_H

View File

@ -23,12 +23,11 @@
#ifndef CLANG_PSEUDO_CXX_CXX_H
#define CLANG_PSEUDO_CXX_CXX_H
#include "clang-pseudo/Language.h"
#include "clang-pseudo/grammar/Grammar.h"
namespace clang {
namespace pseudo {
class LRTable;
namespace cxx {
// Symbol represents nonterminal symbols in the C++ grammar.
// It provides a simple uniform way to access a particular nonterminal.
@ -38,10 +37,8 @@ enum class Symbol : SymbolID {
#undef NONTERMINAL
};
// Returns the C++ grammar.
const Grammar &getGrammar();
// Returns the corresponding LRTable for the C++ grammar.
const LRTable &getLRTable();
// Returns the Language for the cxx.bnf grammar.
const Language &getLanguage();
} // namespace cxx

View File

@ -57,6 +57,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
#include <vector>

View File

@ -1,3 +1,4 @@
add_subdirectory(cli)
add_subdirectory(cxx)
add_subdirectory(grammar)

View File

@ -0,0 +1,48 @@
//===--- CLI.cpp - ----------------------------------------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang-pseudo/cli/CLI.h"
#include "clang-pseudo/cxx/CXX.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
static llvm::cl::opt<std::string> Grammar(
"grammar",
llvm::cl::desc(
"Specify a BNF grammar file path, or a builtin language (cxx)."),
llvm::cl::init("cxx"));
namespace clang {
namespace pseudo {
const Language &getLanguageFromFlags() {
if (::Grammar == "cxx")
return cxx::getLanguage();
static Language *Lang = []() {
// Read from a bnf grammar file.
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
llvm::MemoryBuffer::getFile(::Grammar);
if (std::error_code EC = GrammarText.getError()) {
llvm::errs() << "Error: can't read grammar file '" << ::Grammar
<< "': " << EC.message() << "\n";
std::exit(1);
}
std::vector<std::string> Diags;
auto G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags);
for (const auto &Diag : Diags)
llvm::errs() << Diag << "\n";
auto Table = LRTable::buildSLR(G);
return new Language{std::move(G), std::move(Table)};
}();
return *Lang;
}
} // namespace pseudo
} // namespace clang

View File

@ -0,0 +1,11 @@
set(LLVM_LINK_COMPONENTS
Support
)
add_clang_library(clangPseudoCLI
CLI.cpp
LINK_LIBS
clangPseudoGrammar
clangPseudoCXX
)

View File

@ -7,26 +7,33 @@
//===----------------------------------------------------------------------===//
#include "clang-pseudo/cxx/CXX.h"
#include "clang-pseudo/Language.h"
#include "clang-pseudo/grammar/Grammar.h"
#include "clang-pseudo/grammar/LRTable.h"
#include <utility>
namespace clang {
namespace pseudo {
namespace cxx {
namespace {
static const char *CXXBNF =
#include "CXXBNF.inc"
;
} // namespace
const Grammar &getGrammar() {
static std::vector<std::string> Diags;
static Grammar *G = new Grammar(Grammar::parseBNF(CXXBNF, Diags));
assert(Diags.empty());
return *G;
}
const LRTable &getLRTable() {
static LRTable *Table = new LRTable(LRTable::buildSLR(getGrammar()));
return *Table;
const Language &getLanguage() {
static const auto &CXXLanguage = []() -> const Language & {
std::vector<std::string> Diags;
auto G = Grammar::parseBNF(CXXBNF, Diags);
assert(Diags.empty());
LRTable Table = LRTable::buildSLR(G);
const Language *PL = new Language{
std::move(G),
std::move(Table),
};
return *PL;
}();
return CXXLanguage;
}
} // namespace cxx

View File

@ -13,5 +13,6 @@ target_link_libraries(clang-pseudo
PRIVATE
clangPseudo
clangPseudoGrammar
clangPseudoCLI
)

View File

@ -9,7 +9,9 @@
#include "clang-pseudo/Bracket.h"
#include "clang-pseudo/DirectiveTree.h"
#include "clang-pseudo/GLR.h"
#include "clang-pseudo/Language.h"
#include "clang-pseudo/Token.h"
#include "clang-pseudo/cli/CLI.h"
#include "clang-pseudo/grammar/Grammar.h"
#include "clang-pseudo/grammar/LRGraph.h"
#include "clang-pseudo/grammar/LRTable.h"
@ -22,14 +24,11 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Signals.h"
using clang::pseudo::Grammar;
using clang::pseudo::TokenStream;
using llvm::cl::desc;
using llvm::cl::init;
using llvm::cl::opt;
static opt<std::string>
Grammar("grammar", desc("Parse and check a BNF grammar file."), init(""));
static opt<bool> PrintGrammar("print-grammar", desc("Print the grammar."));
static opt<bool> PrintGraph("print-graph",
desc("Print the LR graph for the grammar"));
@ -123,42 +122,51 @@ int main(int argc, char *argv[]) {
pairBrackets(*ParseableStream);
}
if (Grammar.getNumOccurrences()) {
std::string Text = readOrDie(Grammar);
std::vector<std::string> Diags;
auto G = Grammar::parseBNF(Text, Diags);
const auto &Lang = clang::pseudo::getLanguageFromFlags();
if (PrintGrammar)
llvm::outs() << Lang.G.dump();
if (PrintGraph)
llvm::outs() << clang::pseudo::LRGraph::buildLR0(Lang.G).dumpForTests(
Lang.G);
if (!Diags.empty()) {
llvm::errs() << llvm::join(Diags, "\n");
if (PrintTable)
llvm::outs() << Lang.Table.dumpForTests(Lang.G);
if (PrintStatistics)
llvm::outs() << Lang.Table.dumpStatistics();
if (ParseableStream) {
clang::pseudo::ForestArena Arena;
clang::pseudo::GSS GSS;
llvm::Optional<clang::pseudo::SymbolID> StartSymID =
Lang.G.findNonterminal(StartSymbol);
if (!StartSymID) {
llvm::errs() << llvm::formatv(
"The start symbol {0} doesn't exit in the grammar!\n", StartSymbol);
return 2;
}
llvm::outs() << llvm::formatv("grammar file {0} is parsed successfully\n",
Grammar);
if (PrintGrammar)
llvm::outs() << G.dump();
if (PrintGraph)
llvm::outs() << clang::pseudo::LRGraph::buildLR0(G).dumpForTests(G);
auto LRTable = clang::pseudo::LRTable::buildSLR(G);
if (PrintTable)
llvm::outs() << LRTable.dumpForTests(G);
if (PrintStatistics)
llvm::outs() << LRTable.dumpStatistics();
auto &Root =
glrParse(*ParseableStream,
clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS},
*StartSymID);
if (PrintForest)
llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true);
if (ParseableStream) {
clang::pseudo::ForestArena Arena;
clang::pseudo::GSS GSS;
llvm::Optional<clang::pseudo::SymbolID> StartSymID =
G.findNonterminal(StartSymbol);
Lang.G.findNonterminal(StartSymbol);
if (!StartSymID) {
llvm::errs() << llvm::formatv(
"The start symbol {0} doesn't exit in the grammar!\n", Grammar);
"The start symbol {0} doesn't exit in the grammar!\n", StartSymbol);
return 2;
}
auto &Root = glrParse(*ParseableStream,
clang::pseudo::ParseParams{G, LRTable, Arena, GSS},
*StartSymID);
auto &Root =
glrParse(*ParseableStream,
clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS},
*StartSymID);
if (PrintForest)
llvm::outs() << Root.dumpRecursive(G, /*Abbreviated=*/true);
llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true);
if (PrintStatistics) {
llvm::outs() << "Forest bytes: " << Arena.bytes()
@ -174,7 +182,7 @@ int main(int argc, char *argv[]) {
llvm::outs() << "\n" << Stats.Total << " " << P.first << " nodes:\n";
for (const auto &S : Stats.BySymbol)
llvm::outs() << llvm::formatv(" {0,3} {1}\n", S.second,
G.symbolName(S.first));
Lang.G.symbolName(S.first));
}
}
}

View File

@ -8,6 +8,7 @@
#include "clang-pseudo/GLR.h"
#include "clang-pseudo/Token.h"
#include "clang-pseudo/Language.h"
#include "clang-pseudo/grammar/Grammar.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/TokenKinds.h"
@ -48,9 +49,15 @@ class GLRTest : public ::testing::Test {
public:
void build(llvm::StringRef GrammarBNF) {
std::vector<std::string> Diags;
G = Grammar::parseBNF(GrammarBNF, Diags);
TestLang.G = Grammar::parseBNF(GrammarBNF, Diags);
}
TokenStream emptyTokenStream() {
TokenStream Empty;
Empty.finalize();
return Empty;
}
void buildGrammar(std::vector<std::string> Nonterminals,
std::vector<std::string> Rules) {
Nonterminals.push_back("_");
@ -66,19 +73,22 @@ public:
SymbolID id(llvm::StringRef Name) const {
for (unsigned I = 0; I < NumTerminals; ++I)
if (G.table().Terminals[I] == Name)
if (TestLang.G.table().Terminals[I] == Name)
return tokenSymbol(static_cast<tok::TokenKind>(I));
for (SymbolID ID = 0; ID < G.table().Nonterminals.size(); ++ID)
if (G.table().Nonterminals[ID].Name == Name)
for (SymbolID ID = 0; ID < TestLang.G.table().Nonterminals.size(); ++ID)
if (TestLang.G.table().Nonterminals[ID].Name == Name)
return ID;
ADD_FAILURE() << "No such symbol found: " << Name;
return 0;
}
RuleID ruleFor(llvm::StringRef NonterminalName) const {
auto RuleRange = G.table().Nonterminals[id(NonterminalName)].RuleRange;
auto RuleRange =
TestLang.G.table().Nonterminals[id(NonterminalName)].RuleRange;
if (RuleRange.End - RuleRange.Start == 1)
return G.table().Nonterminals[id(NonterminalName)].RuleRange.Start;
return TestLang.G.table()
.Nonterminals[id(NonterminalName)]
.RuleRange.Start;
ADD_FAILURE() << "Expected a single rule for " << NonterminalName
<< ", but it has " << RuleRange.End - RuleRange.Start
<< " rule!\n";
@ -86,7 +96,7 @@ public:
}
protected:
Grammar G;
Language TestLang;
ForestArena Arena;
GSS GSStack;
};
@ -112,9 +122,8 @@ TEST_F(GLRTest, ShiftMergingHeads) {
/*Parents=*/{GSSNode0});
buildGrammar({}, {}); // Create a fake empty grammar.
LRTable T =
LRTable::buildForTests(G, /*Entries=*/
{
TestLang.Table =
LRTable::buildForTests(TestLang.G, /*Entries=*/{
{1, tokenSymbol(tok::semi), Action::shift(4)},
{2, tokenSymbol(tok::semi), Action::shift(4)},
{3, tokenSymbol(tok::semi), Action::shift(5)},
@ -123,8 +132,8 @@ TEST_F(GLRTest, ShiftMergingHeads) {
ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0);
std::vector<const GSS::Node *> NewHeads;
glrShift({GSSNode1, GSSNode2, GSSNode3}, SemiTerminal, {G, T, Arena, GSStack},
NewHeads);
glrShift({GSSNode1, GSSNode2, GSSNode3}, SemiTerminal,
{TestLang.G, TestLang.Table, Arena, GSStack}, NewHeads);
EXPECT_THAT(NewHeads,
UnorderedElementsAre(AllOf(state(4), parsedSymbol(&SemiTerminal),
@ -144,8 +153,8 @@ TEST_F(GLRTest, ReduceConflictsSplitting) {
buildGrammar({"class-name", "enum-name"},
{"class-name := IDENTIFIER", "enum-name := IDENTIFIER"});
LRTable Table = LRTable::buildForTests(
G,
TestLang.Table = LRTable::buildForTests(
TestLang.G,
{
{/*State=*/0, id("class-name"), Action::goTo(2)},
{/*State=*/0, id("enum-name"), Action::goTo(3)},
@ -161,7 +170,8 @@ TEST_F(GLRTest, ReduceConflictsSplitting) {
GSStack.addNode(1, &Arena.createTerminal(tok::identifier, 0), {GSSNode0});
std::vector<const GSS::Node *> Heads = {GSSNode1};
glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack});
glrReduce(Heads, tokenSymbol(tok::eof),
{TestLang.G, TestLang.Table, Arena, GSStack});
EXPECT_THAT(Heads, UnorderedElementsAre(
GSSNode1,
AllOf(state(2), parsedSymbolID(id("class-name")),
@ -192,8 +202,8 @@ TEST_F(GLRTest, ReduceSplittingDueToMultipleBases) {
/*State=*/4, &Arena.createTerminal(tok::star, /*TokenIndex=*/1),
/*Parents=*/{GSSNode2, GSSNode3});
LRTable Table = LRTable::buildForTests(
G,
TestLang.Table = LRTable::buildForTests(
TestLang.G,
{
{/*State=*/2, id("ptr-operator"), Action::goTo(/*NextState=*/5)},
{/*State=*/3, id("ptr-operator"), Action::goTo(/*NextState=*/6)},
@ -202,7 +212,7 @@ TEST_F(GLRTest, ReduceSplittingDueToMultipleBases) {
{/*State=*/4, ruleFor("ptr-operator")},
});
std::vector<const GSS::Node *> Heads = {GSSNode4};
glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack});
glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack});
EXPECT_THAT(Heads, UnorderedElementsAre(
GSSNode4,
@ -246,8 +256,8 @@ TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
/*Parents=*/{GSSNode2});
// FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
LRTable Table = LRTable::buildForTests(
G,
TestLang.Table = LRTable::buildForTests(
TestLang.G,
{
{/*State=*/1, id("type-name"), Action::goTo(/*NextState=*/5)},
{/*State=*/2, id("type-name"), Action::goTo(/*NextState=*/5)},
@ -257,7 +267,7 @@ TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
{/*State=*/4, /* type-name := enum-name */ 1},
});
std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack});
glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack});
// Verify that the stack heads are joint at state 5 after reduces.
EXPECT_THAT(Heads, UnorderedElementsAre(GSSNode3, GSSNode4,
@ -266,7 +276,7 @@ TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
parents({GSSNode1, GSSNode2}))))
<< Heads;
// Verify that we create an ambiguous ForestNode of two parses of `type-name`.
EXPECT_EQ(Heads.back()->Payload->dumpRecursive(G),
EXPECT_EQ(Heads.back()->Payload->dumpRecursive(TestLang.G),
"[ 1, end) type-name := <ambiguous>\n"
"[ 1, end) ├─type-name := class-name\n"
"[ 1, end) │ └─class-name := <opaque>\n"
@ -304,8 +314,8 @@ TEST_F(GLRTest, ReduceJoiningWithSameBase) {
/*Parents=*/{GSSNode2});
// FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
LRTable Table =
LRTable::buildForTests(G,
TestLang.Table =
LRTable::buildForTests(TestLang.G,
{
{/*State=*/0, id("pointer"), Action::goTo(5)},
},
@ -314,14 +324,15 @@ TEST_F(GLRTest, ReduceJoiningWithSameBase) {
{4, /* pointer := enum-name */ 1},
});
std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack});
glrReduce(Heads, tokenSymbol(tok::eof),
{TestLang.G, TestLang.Table, Arena, GSStack});
EXPECT_THAT(
Heads, UnorderedElementsAre(GSSNode3, GSSNode4,
AllOf(state(5), parsedSymbolID(id("pointer")),
parents({GSSNode0}))))
<< Heads;
EXPECT_EQ(Heads.back()->Payload->dumpRecursive(G),
EXPECT_EQ(Heads.back()->Payload->dumpRecursive(TestLang.G),
"[ 0, end) pointer := <ambiguous>\n"
"[ 0, end) ├─pointer := class-name *\n"
"[ 0, 1) │ ├─class-name := <opaque>\n"
@ -334,8 +345,8 @@ TEST_F(GLRTest, ReduceJoiningWithSameBase) {
TEST_F(GLRTest, ReduceLookahead) {
// A term can be followed by +, but not by -.
buildGrammar({"sum", "term"}, {"expr := term + term", "term := IDENTIFIER"});
LRTable Table =
LRTable::buildForTests(G,
TestLang.Table =
LRTable::buildForTests(TestLang.G,
{
{/*State=*/0, id("term"), Action::goTo(2)},
},
@ -352,14 +363,14 @@ TEST_F(GLRTest, ReduceLookahead) {
// When the lookahead is +, reduce is performed.
std::vector<const GSS::Node *> Heads = {GSSNode1};
glrReduce(Heads, tokenSymbol(tok::plus), {G, Table, Arena, GSStack});
glrReduce(Heads, tokenSymbol(tok::plus), {TestLang.G, TestLang.Table, Arena, GSStack});
EXPECT_THAT(Heads,
ElementsAre(GSSNode1, AllOf(state(2), parsedSymbolID(id("term")),
parents(Root))));
// When the lookahead is -, reduce is not performed.
Heads = {GSSNode1};
glrReduce(Heads, tokenSymbol(tok::minus), {G, Table, Arena, GSStack});
glrReduce(Heads, tokenSymbol(tok::minus), {TestLang.G, TestLang.Table, Arena, GSStack});
EXPECT_THAT(Heads, ElementsAre(GSSNode1));
}
@ -380,26 +391,27 @@ TEST_F(GLRTest, PerfectForestNodeSharing) {
left-paren := {
expr := IDENTIFIER
)bnf");
TestLang.Table = LRTable::buildSLR(TestLang.G);
clang::LangOptions LOptions;
const TokenStream &Tokens = cook(lex("{ abc", LOptions), LOptions);
auto LRTable = LRTable::buildSLR(G);
const ForestNode &Parsed =
glrParse(Tokens, {G, LRTable, Arena, GSStack}, id("test"));
glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
// Verify that there is no duplicated sequence node of `expr := IDENTIFIER`
// in the forest, see the `#1` and `=#1` in the dump string.
EXPECT_EQ(Parsed.dumpRecursive(G), "[ 0, end) test := <ambiguous>\n"
"[ 0, end) ├─test := { expr\n"
"[ 0, 1) │ ├─{ := tok[0]\n"
"[ 1, end) │ └─expr := IDENTIFIER #1\n"
"[ 1, end) │ └─IDENTIFIER := tok[1]\n"
"[ 0, end) ├─test := { IDENTIFIER\n"
"[ 0, 1) │ ├─{ := tok[0]\n"
"[ 1, end) │ └─IDENTIFIER := tok[1]\n"
"[ 0, end) └─test := left-paren expr\n"
"[ 0, 1) ├─left-paren := {\n"
"[ 0, 1) │ └─{ := tok[0]\n"
"[ 1, end) └─expr =#1\n");
EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
"[ 0, end) test := <ambiguous>\n"
"[ 0, end) ├─test := { expr\n"
"[ 0, 1) │ ├─{ := tok[0]\n"
"[ 1, end) │ └─expr := IDENTIFIER #1\n"
"[ 1, end) │ └─IDENTIFIER := tok[1]\n"
"[ 0, end) ├─test := { IDENTIFIER\n"
"[ 0, 1) │ ├─{ := tok[0]\n"
"[ 1, end) │ └─IDENTIFIER := tok[1]\n"
"[ 0, end) └─test := left-paren expr\n"
"[ 0, 1) ├─left-paren := {\n"
"[ 0, 1) │ └─{ := tok[0]\n"
"[ 1, end) └─expr =#1\n");
}
TEST_F(GLRTest, GLRReduceOrder) {
@ -418,16 +430,17 @@ TEST_F(GLRTest, GLRReduceOrder) {
)bnf");
clang::LangOptions LOptions;
const TokenStream &Tokens = cook(lex("IDENTIFIER", LOptions), LOptions);
auto LRTable = LRTable::buildSLR(G);
TestLang.Table = LRTable::buildSLR(TestLang.G);
const ForestNode &Parsed =
glrParse(Tokens, {G, LRTable, Arena, GSStack}, id("test"));
EXPECT_EQ(Parsed.dumpRecursive(G), "[ 0, end) test := <ambiguous>\n"
"[ 0, end) ├─test := IDENTIFIER\n"
"[ 0, end) │ └─IDENTIFIER := tok[0]\n"
"[ 0, end) └─test := foo\n"
"[ 0, end) └─foo := IDENTIFIER\n"
"[ 0, end) └─IDENTIFIER := tok[0]\n");
glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
"[ 0, end) test := <ambiguous>\n"
"[ 0, end) ├─test := IDENTIFIER\n"
"[ 0, end) │ └─IDENTIFIER := tok[0]\n"
"[ 0, end) └─test := foo\n"
"[ 0, end) └─foo := IDENTIFIER\n"
"[ 0, end) └─IDENTIFIER := tok[0]\n");
}
TEST_F(GLRTest, NoExplicitAccept) {
@ -442,14 +455,15 @@ TEST_F(GLRTest, NoExplicitAccept) {
// of the nonterminal `test` when the next token is `eof`, verify that the
// parser stops at the right state.
const TokenStream &Tokens = cook(lex("id id", LOptions), LOptions);
auto LRTable = LRTable::buildSLR(G);
TestLang.Table = LRTable::buildSLR(TestLang.G);
const ForestNode &Parsed =
glrParse(Tokens, {G, LRTable, Arena, GSStack}, id("test"));
EXPECT_EQ(Parsed.dumpRecursive(G), "[ 0, end) test := IDENTIFIER test\n"
"[ 0, 1) ├─IDENTIFIER := tok[0]\n"
"[ 1, end) └─test := IDENTIFIER\n"
"[ 1, end) └─IDENTIFIER := tok[1]\n");
glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
"[ 0, end) test := IDENTIFIER test\n"
"[ 0, 1) ├─IDENTIFIER := tok[0]\n"
"[ 1, end) └─test := IDENTIFIER\n"
"[ 1, end) └─IDENTIFIER := tok[1]\n");
}
TEST(GSSTest, GC) {