//===--- TokenTest.cpp ----------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "clang-pseudo/Token.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/TokenKinds.h" #include "gmock/gmock.h" #include "gtest/gtest.h" namespace clang { namespace pseudo { namespace { using testing::AllOf; using testing::ElementsAre; using testing::ElementsAreArray; using testing::Not; MATCHER_P2(token, Text, Kind, "") { return arg.Kind == Kind && arg.text() == Text; } MATCHER_P(hasFlag, Flag, "") { return arg.flag(Flag); } MATCHER_P2(lineIndent, Line, Indent, "") { return arg.Line == (unsigned)Line && arg.Indent == (unsigned)Indent; } MATCHER_P(originalIndex, index, "") { return arg.OriginalIndex == (Token::Index)index; } TEST(TokenTest, Lex) { LangOptions Opts; std::string Code = R"cpp( #include int main() { return 42; // the answer } )cpp"; TokenStream Raw = lex(Code, Opts); ASSERT_TRUE(Raw.isFinalized()); EXPECT_THAT(Raw.tokens(), ElementsAreArray({ // Lexing of directives is weird, especially strings. token("#", tok::hash), token("include", tok::raw_identifier), token("<", tok::less), token("stdio", tok::raw_identifier), token(".", tok::period), token("h", tok::raw_identifier), token(">", tok::greater), token("int", tok::raw_identifier), token("main", tok::raw_identifier), token("(", tok::l_paren), token(")", tok::r_paren), token("{", tok::l_brace), token("return", tok::raw_identifier), token("42", tok::numeric_constant), token(";", tok::semi), token("// the answer", tok::comment), token("}", tok::r_brace), })); TokenStream Cooked = cook(Raw, Opts); ASSERT_TRUE(Cooked.isFinalized()); EXPECT_THAT(Cooked.tokens(), ElementsAreArray({ // Cooked identifier types in directives are not meaningful. token("#", tok::hash), token("include", tok::identifier), token("<", tok::less), token("stdio", tok::identifier), token(".", tok::period), token("h", tok::identifier), token(">", tok::greater), token("int", tok::kw_int), token("main", tok::identifier), token("(", tok::l_paren), token(")", tok::r_paren), token("{", tok::l_brace), token("return", tok::kw_return), token("42", tok::numeric_constant), token(";", tok::semi), token("// the answer", tok::comment), token("}", tok::r_brace), })); // Check raw tokens point back into original source code. EXPECT_EQ(Raw.tokens().front().text().begin(), &Code[Code.find('#')]); } TEST(TokenTest, LineContinuation) { LangOptions Opts; std::string Code = R"cpp( one_\ token two \ tokens )cpp"; TokenStream Raw = lex(Code, Opts); EXPECT_THAT( Raw.tokens(), ElementsAre(AllOf(token("one_\\\ntoken", tok::raw_identifier), hasFlag(LexFlags::StartsPPLine), hasFlag(LexFlags::NeedsCleaning), lineIndent(1, 0), originalIndex(0)), AllOf(token("two", tok::raw_identifier), hasFlag(LexFlags::StartsPPLine), Not(hasFlag(LexFlags::NeedsCleaning)), originalIndex(1)), AllOf(token("\\\ntokens", tok::raw_identifier), Not(hasFlag(LexFlags::StartsPPLine)), hasFlag(LexFlags::NeedsCleaning), originalIndex(2)))); TokenStream Cooked = cook(Raw, Opts); EXPECT_THAT( Cooked.tokens(), ElementsAre(AllOf(token("one_token", tok::identifier), lineIndent(1, 0), originalIndex(0)), AllOf(token("two", tok::identifier), originalIndex(1)), AllOf(token("tokens", tok::identifier), originalIndex(2)))); } TEST(TokenTest, EncodedCharacters) { LangOptions Opts; Opts.Trigraphs = true; Opts.Digraphs = true; Opts.C99 = true; // UCNs Opts.CXXOperatorNames = true; std::string Code = R"(and <: ??! '??=' \u00E9)"; TokenStream Raw = lex(Code, Opts); EXPECT_THAT( Raw.tokens(), ElementsAre( // and is not recognized as && until cook(). AllOf(token("and", tok::raw_identifier), Not(hasFlag(LexFlags::NeedsCleaning))), // Digraphs are just different spellings of tokens. AllOf(token("<:", tok::l_square), Not(hasFlag(LexFlags::NeedsCleaning))), // Trigraps are interpreted, still need text cleaning. AllOf(token(R"(??!)", tok::pipe), hasFlag(LexFlags::NeedsCleaning)), // Trigraphs must be substituted inside constants too. AllOf(token(R"('??=')", tok::char_constant), hasFlag(LexFlags::NeedsCleaning)), // UCNs need substitution. AllOf(token(R"(\u00E9)", tok::raw_identifier), hasFlag(LexFlags::NeedsCleaning)))); TokenStream Cooked = cook(Raw, Opts); EXPECT_THAT( Cooked.tokens(), ElementsAre(token("and", tok::ampamp), // alternate spelling recognized token("<:", tok::l_square), token("|", tok::pipe), // trigraph substituted token("'#'", tok::char_constant), // trigraph substituted token("é", tok::identifier))); // UCN substituted } TEST(TokenTest, Indentation) { LangOptions Opts; std::string Code = R"cpp( hello world no_indent \ line_was_continued )cpp"; TokenStream Raw = lex(Code, Opts); EXPECT_THAT(Raw.tokens(), ElementsAreArray({ lineIndent(0, 3), // hello lineIndent(0, 3), // world lineIndent(1, 0), // no_indent lineIndent(2, 2), // line_was_continued })); } TEST(TokenTest, SplitGreaterGreater) { LangOptions Opts; std::string Code = R"cpp( >> // split // >> with an escaped newline in the middle, split >\ > >>= // not split )cpp"; TokenStream Cook = cook(lex(Code, Opts), Opts); TokenStream Split = stripComments(Cook); EXPECT_THAT(Split.tokens(), ElementsAre(AllOf(token(">", tok::greater), originalIndex(0)), AllOf(token(">", tok::greater), originalIndex(0)), // Token 1 and 2 are comments. AllOf(token(">", tok::greater), originalIndex(3)), AllOf(token(">", tok::greater), originalIndex(3)), AllOf(token(">>=", tok::greatergreaterequal), originalIndex(4)))); } TEST(TokenTest, DropComments) { LangOptions Opts; std::string Code = R"cpp( // comment int /*abc*/; )cpp"; TokenStream Raw = cook(lex(Code, Opts), Opts); TokenStream Stripped = stripComments(Raw); EXPECT_THAT( Raw.tokens(), ElementsAre(AllOf(token("// comment", tok::comment), originalIndex(0)), AllOf(token("int", tok::kw_int), originalIndex(1)), AllOf(token("/*abc*/", tok::comment), originalIndex(2)), AllOf(token(";", tok::semi), originalIndex(3)))); EXPECT_THAT(Stripped.tokens(), ElementsAre(AllOf(token("int", tok::kw_int), originalIndex(1)), AllOf(token(";", tok::semi), originalIndex(3)))); } } // namespace } // namespace pseudo } // namespace clang