//===--- GrammarTest.cpp - grammar tests -----------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "clang-pseudo/grammar/Grammar.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include namespace clang { namespace pseudo { namespace { using testing::AllOf; using testing::ElementsAre; using testing::IsEmpty; using testing::Pair; using testing::UnorderedElementsAre; MATCHER_P(TargetID, SID, "") { return arg.Target == SID; } template testing::Matcher Sequence(T... IDs) { return testing::Property(&Rule::seq, ElementsAre(IDs...)); } class GrammarTest : public ::testing::Test { public: void build(llvm::StringRef BNF) { Diags.clear(); G = Grammar::parseBNF(BNF, Diags); } SymbolID id(llvm::StringRef Name) const { for (unsigned I = 0; I < NumTerminals; ++I) if (G.table().Terminals[I] == Name) return tokenSymbol(static_cast(I)); for (SymbolID ID = 0; ID < G.table().Nonterminals.size(); ++ID) if (G.table().Nonterminals[ID].Name == Name) return ID; ADD_FAILURE() << "No such symbol found: " << Name; return 0; } RuleID ruleFor(llvm::StringRef NonterminalName) const { auto RuleRange = G.table().Nonterminals[id(NonterminalName)].RuleRange; if (RuleRange.End - RuleRange.Start == 1) return G.table().Nonterminals[id(NonterminalName)].RuleRange.Start; ADD_FAILURE() << "Expected a single rule for " << NonterminalName << ", but it has " << RuleRange.End - RuleRange.Start << " rule!\n"; return 0; } protected: Grammar G; std::vector Diags; }; TEST_F(GrammarTest, Basic) { build("_ := IDENTIFIER + _ # comment"); EXPECT_THAT(Diags, IsEmpty()); auto ExpectedRule = AllOf(TargetID(id("_")), Sequence(id("IDENTIFIER"), id("+"), id("_"))); EXPECT_EQ(G.symbolName(id("_")), "_"); EXPECT_THAT(G.rulesFor(id("_")), UnorderedElementsAre(ExpectedRule)); const auto &Rule = G.lookupRule(/*RID=*/0); EXPECT_THAT(Rule, ExpectedRule); EXPECT_THAT(G.symbolName(Rule.seq()[0]), "IDENTIFIER"); EXPECT_THAT(G.symbolName(Rule.seq()[1]), "+"); EXPECT_THAT(G.symbolName(Rule.seq()[2]), "_"); } TEST_F(GrammarTest, EliminatedOptional) { build("_ := CONST_opt INT ;_opt"); EXPECT_THAT(Diags, IsEmpty()); EXPECT_THAT(G.table().Rules, UnorderedElementsAre(Sequence(id("INT")), Sequence(id("CONST"), id("INT")), Sequence(id("CONST"), id("INT"), id(";")), Sequence(id("INT"), id(";")))); } TEST_F(GrammarTest, RuleIDSorted) { build(R"bnf( _ := x x := y y := z z := IDENTIFIER )bnf"); ASSERT_TRUE(Diags.empty()); EXPECT_LT(ruleFor("z"), ruleFor("y")); EXPECT_LT(ruleFor("y"), ruleFor("x")); EXPECT_LT(ruleFor("x"), ruleFor("_")); } TEST_F(GrammarTest, Annotation) { build(R"bnf( _ := x x := IDENTIFIER [guard] )bnf"); ASSERT_THAT(Diags, IsEmpty()); EXPECT_FALSE(G.lookupRule(ruleFor("_")).Guarded); EXPECT_TRUE(G.lookupRule(ruleFor("x")).Guarded); } TEST_F(GrammarTest, Diagnostics) { build(R"cpp( _ := ,_opt _ := undefined-sym null := _ := IDENFIFIE # a typo of the terminal IDENFITIER invalid # cycle a := b b := a _ := IDENTIFIER [unknown=value] )cpp"); EXPECT_EQ(G.underscore(), id("_")); EXPECT_THAT(Diags, UnorderedElementsAre( "Rule '_ := ,_opt' has a nullable RHS", "Rule 'null := ' has a nullable RHS", "No rules for nonterminal: undefined-sym", "Failed to parse 'invalid': no separator :=", "Token-like name IDENFIFIE is used as a nonterminal", "No rules for nonterminal: IDENFIFIE", "The grammar contains a cycle involving symbol a", "Unknown attribute 'unknown'")); } TEST_F(GrammarTest, DuplicatedDiagnostics) { build(R"cpp( _ := test test := INT test := DOUBLE test := INT )cpp"); EXPECT_THAT(Diags, UnorderedElementsAre("Duplicate rule: `test := INT`")); } TEST_F(GrammarTest, FirstAndFollowSets) { build( R"bnf( _ := expr expr := expr - term expr := term term := IDENTIFIER term := ( expr ) )bnf"); ASSERT_TRUE(Diags.empty()); auto ToPairs = [](std::vector> Input) { std::vector>> Sets; for (SymbolID ID = 0; ID < Input.size(); ++ID) Sets.emplace_back(ID, std::move(Input[ID])); return Sets; }; EXPECT_THAT( ToPairs(firstSets(G)), UnorderedElementsAre( Pair(id("_"), UnorderedElementsAre(id("IDENTIFIER"), id("("))), Pair(id("expr"), UnorderedElementsAre(id("IDENTIFIER"), id("("))), Pair(id("term"), UnorderedElementsAre(id("IDENTIFIER"), id("("))))); EXPECT_THAT( ToPairs(followSets(G)), UnorderedElementsAre( Pair(id("_"), UnorderedElementsAre(id("EOF"))), Pair(id("expr"), UnorderedElementsAre(id("-"), id("EOF"), id(")"))), Pair(id("term"), UnorderedElementsAre(id("-"), id("EOF"), id(")"))))); build(R"bnf( # A simplfied C++ decl-specifier-seq. _ := decl-specifier-seq decl-specifier-seq := decl-specifier decl-specifier-seq decl-specifier-seq := decl-specifier decl-specifier := simple-type-specifier decl-specifier := INLINE simple-type-specifier := INT )bnf"); ASSERT_TRUE(Diags.empty()); EXPECT_THAT( ToPairs(firstSets(G)), UnorderedElementsAre( Pair(id("_"), UnorderedElementsAre(id("INLINE"), id("INT"))), Pair(id("decl-specifier-seq"), UnorderedElementsAre(id("INLINE"), id("INT"))), Pair(id("simple-type-specifier"), UnorderedElementsAre(id("INT"))), Pair(id("decl-specifier"), UnorderedElementsAre(id("INLINE"), id("INT"))))); EXPECT_THAT( ToPairs(followSets(G)), UnorderedElementsAre( Pair(id("_"), UnorderedElementsAre(id("EOF"))), Pair(id("decl-specifier-seq"), UnorderedElementsAre(id("EOF"))), Pair(id("decl-specifier"), UnorderedElementsAre(id("INLINE"), id("INT"), id("EOF"))), Pair(id("simple-type-specifier"), UnorderedElementsAre(id("INLINE"), id("INT"), id("EOF"))))); } } // namespace } // namespace pseudo } // namespace clang