From aa2beb32f1cb23f525cb9bd89f5377110ee3382a Mon Sep 17 00:00:00 2001 From: Sam Vervaeck Date: Fri, 14 Feb 2025 19:59:35 +0100 Subject: [PATCH] Use embedded LLVM and fix compile errors --- CMakeLists.txt | 77 +++++++++++++++++++-------------- src/LLVMCodeGen.cc | 102 ++++++++++++++++++++++++++++++++++++++------ src/LLVMCodeGen.hpp | 37 +++++++++++++--- src/main.cc | 84 ++++++++++++++++++++++++------------ 4 files changed, 220 insertions(+), 80 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0a11b4b15..14c4280f0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,11 +3,29 @@ cmake_minimum_required(VERSION 3.20) project(Bolt C CXX) +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules") + +option(BOLT_ENABLE_LLVM "Enable using LLVM as a back-end" ON) +option(BOLT_ENABLE_CLANG "Enable integration with the Clang compiler" ON) + +if (NOT BOLT_ENABLE_LLVM AND BOLT_ENABLE_CLANG) + message(FATAL_ERROR "BOLT_ENABLE_LLVM must be set when enabling BOLT_ENABLE_CLANG") +endif() + set(CMAKE_CXX_STANDARD 20) -add_subdirectory(deps/zen EXCLUDE_FROM_ALL) -# add_subdirectory(deps/llvm-project/llvm EXCLUDE_FROM_ALL) +if (BOLT_ENABLE_LLVM) + add_subdirectory(deps/llvm/llvm EXCLUDE_FROM_ALL) + # find_package(LLVM 18 REQUIRED all-targets) + # if (BOLT_ENABLE_CLANG) + # find_package(Clang 18 REQUIRED) + # endif() +endif() +add_subdirectory(deps/zen EXCLUDE_FROM_ALL) +add_subdirectory(deps/fmt EXCLUDE_FROM_ALL) + +# FIXME temporary solution set(ICU_DIR "${CMAKE_CURRENT_SOURCE_DIR}/build/icu/install") set(ICU_CFLAGS "-DUNISTR_FROM_CHAR_EXPLICIT=explicit -DUNISTR_FROM_STRING_EXPLICIT=explicit -DU_NO_DEFAULT_INCLUDE_UTF_HEADERS=1 -DU_HIDE_OBSOLETE_UTF_OLD_H=1") set(ICU_INCLUDE_DIRS "${ICU_DIR}/include") @@ -18,8 +36,6 @@ if (CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo" OR CMAKE_BUILD_TYPE STREQUAL "Deb set(BOLT_DEBUG ON) endif() -#find_package(LLVM 19.0 REQUIRED) - add_library( BoltCore #src/Text.cc @@ -38,15 +54,30 @@ target_link_directories( BoltCore PUBLIC ${ICU_LIBRARY_DIRS} + ${CLANG_LIBDIRS} ) target_compile_options( BoltCore PUBLIC -Werror -fno-exceptions + #-fno-rtti ${ICU_CFLAGS} ) - +target_include_directories( + BoltCore + PUBLIC + include + ${ICU_LIBRARY_DIRS} + ${CLANG_INCLUDE_DIRS} +) +target_link_libraries( + BoltCore + PUBLIC + zen + icuuc + ${CLANG_LIBRARIES} +) if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND BOLT_DEBUG) target_compile_options( BoltCore @@ -54,37 +85,18 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND BOLT_DEBUG) -fstandalone-debug ) endif() -target_include_directories( - BoltCore - PUBLIC - include - ${ICU_LIBRARY_DIRS} + +add_library( + BoltLLVM + src/LLVMCodeGen.cc ) target_link_libraries( - BoltCore + BoltLLVM PUBLIC - zen - icuuc + BoltCore + LLVM ) -# add_library( -# BoltLLVM -# src/LLVMCodeGen.cc -# ) -# target_link_libraries( -# BoltLLVM -# PUBLIC -# BoltCore -# LLVMCore -# LLVMTarget -# ) -# target_include_directories( -# BoltLLVM -# PUBLIC -# deps/llvm-project/llvm/include # FIXME this is a hack -# ${CMAKE_BINARY_DIR}/deps/llvm-project/llvm/include # FIXME this is a hack -# ) - add_executable( bolt src/main.cc @@ -93,7 +105,8 @@ target_link_libraries( bolt PUBLIC BoltCore - # BoltLLVM + BoltLLVM + fmt::fmt ) if (BOLT_ENABLE_TESTS) diff --git a/src/LLVMCodeGen.cc b/src/LLVMCodeGen.cc index 56955e871..9ef7019c4 100644 --- a/src/LLVMCodeGen.cc +++ b/src/LLVMCodeGen.cc @@ -2,17 +2,34 @@ #include #include +#include "llvm/IR/Module.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Function.h" + #include "bolt/CST.hpp" -#include "bolt/CSTVisitor.hpp" +#include "bolt/Type.hpp" +#include "bolt/Checker.hpp" #include "LLVMCodeGen.hpp" namespace bolt { -LLVMCodeGen::LLVMCodeGen(llvm::LLVMContext* TheContext): - TheContext(TheContext) {} +LLVMCodeGen::LLVMCodeGen(llvm::LLVMContext& TheContext, Checker& TheChecker): + TheContext(TheContext), TheChecker(TheChecker) { + IntBitWidth = 64; + IntType = llvm::Type::getIntNTy(TheContext, IntBitWidth); + BoolType = llvm::Type::getInt1Ty(TheContext); + UnitType = llvm::StructType::get(TheContext); + StringType = llvm::PointerType::getUnqual(llvm::Type::getInt8Ty(TheContext)); + Types.emplace("Int", IntType); + Types.emplace("Bool", BoolType); + Types.emplace("String", BoolType); + } -llvm::Value* LLVMCodeGen::generateExpression(Expression* E) { +llvm::Value* LLVMCodeGen::generateExpression(Expression* E, llvm::BasicBlock* BB) { switch (E->getKind()) { @@ -23,8 +40,8 @@ llvm::Value* LLVMCodeGen::generateExpression(Expression* E) { case NodeKind::IntegerLiteral: { auto V = static_cast(Lit->Token)->V; - // TODO assert that V fits in the APInt - return llvm::ConstantInt::get(*TheContext, llvm::APInt(32, V)); + ZEN_ASSERT(V < std::pow(2, IntBitWidth)); + return llvm::ConstantInt::get(TheContext, llvm::APInt(IntBitWidth, V)); } case NodeKind::StringLiteral: { @@ -36,6 +53,19 @@ llvm::Value* LLVMCodeGen::generateExpression(Expression* E) { } } + case NodeKind::ReturnExpression: + { + auto Return = static_cast(E); + std::optional Value; + if (Return->hasExpression()) { + auto Value = generateExpression(Return->getExpression(), BB); + Builder->CreateRet(Value); + } else { + Builder->CreateRetVoid(); + } + return llvm::ConstantStruct::get(UnitType, {}); + } + default: ZEN_UNREACHABLE @@ -43,23 +73,67 @@ llvm::Value* LLVMCodeGen::generateExpression(Expression* E) { } -void LLVMCodeGen::generateElement(Node* N) { - switch (N->getKind()) { - case NodeKind::ExpressionStatement: +llvm::Type* LLVMCodeGen::generateType(Type* Ty) { + + std::vector ParamTypes; + while (Ty->getKind() == TypeKind::Fun) { + auto Fun = static_cast(Ty); + ParamTypes.push_back(Fun->getLeft()); + Ty = Fun->getRight(); + } + + switch (Ty->getKind()) { + + case TypeKind::Con: { - auto Stmt = static_cast(N); - generateExpression(Stmt->Expression); + auto Con = static_cast(Ty); + auto Match = Types.find(ByteString { Con->getName() }); + ZEN_ASSERT(Match != Types.end()); + return Match->second; } + default: ZEN_UNREACHABLE + } } -void LLVMCodeGen::generate(SourceFile* SF) { - Module = std::make_unique(SF->File.getPath(), *TheContext); +void LLVMCodeGen::generateFunctionDeclaration(FunctionDeclaration* Decl, llvm::BasicBlock* BB) { + auto Ty = generateType(TheChecker.getTypeOfNode(Decl)); + +} + +void LLVMCodeGen::generateElement(Node* N, llvm::BasicBlock* BB) { + + if (isa(N)) { + auto Expr = static_cast(N); + generateExpression(Expr, BB); + return; + } + + switch (N->getKind()) { + + case NodeKind::NamedFunctionDeclaration: + case NodeKind::PrefixFunctionDeclaration: + case NodeKind::InfixFunctionDeclaration: + case NodeKind::SuffixFunctionDeclaration: + return generateFunctionDeclaration(static_cast(N), BB); + + default: + ZEN_UNREACHABLE + + } + +} + +std::unique_ptr LLVMCodeGen::generate(SourceFile* SF) { + auto TheModule = std::make_unique(SF->File.getPath(), TheContext); + auto MainType = llvm::FunctionType::get(IntType, std::vector { IntType }, false); + auto Main = llvm::Function::Create(MainType, llvm::Function::ExternalLinkage, "main", TheModule.get()); for (auto Element: SF->Elements) { - generateElement(Element); + generateElement(Element, &Main->getEntryBlock()); } + return TheModule; } } diff --git a/src/LLVMCodeGen.hpp b/src/LLVMCodeGen.hpp index e66501ea9..126583b51 100644 --- a/src/LLVMCodeGen.hpp +++ b/src/LLVMCodeGen.hpp @@ -1,32 +1,55 @@ #pragma once +#include #include #include "llvm/IR/IRBuilder.h" +#include "bolt/ByteString.hpp" + namespace bolt { +class Checker; +class Type; class Node; class SourceFile; class Expression; +class FunctionDeclaration;; class LLVMCodeGen { - llvm::LLVMContext* TheContext; + llvm::LLVMContext& TheContext; + + Checker& TheChecker; + + std::unordered_map Types; + + unsigned int IntBitWidth; + llvm::Type* IntType; + llvm::Type* BoolType; + llvm::StructType* UnitType; + + llvm::PointerType* StringType; std::unique_ptr> Builder; - std::unique_ptr Module; - public: - LLVMCodeGen(llvm::LLVMContext* TheContext); + LLVMCodeGen( + llvm::LLVMContext& TheContext, + Checker& TheChecker + ); - llvm::Value* generateExpression(Expression* E); + llvm::Value* generateExpression(Expression* Expr, llvm::BasicBlock* BB); - void generateElement(Node* Element); - void generate(SourceFile* SF); + llvm::Type* generateType(Type* Ty); + + void generateFunctionDeclaration(FunctionDeclaration* Decl, llvm::BasicBlock* BB); + + void generateElement(Node* Element, llvm::BasicBlock* BB); + + std::unique_ptr generate(SourceFile* SF); }; diff --git a/src/main.cc b/src/main.cc index 05c7ca954..86f6b84b8 100644 --- a/src/main.cc +++ b/src/main.cc @@ -1,12 +1,18 @@ -#include - +#include #include #include #include #include +#include "llvm/IR/Module.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Path.h" + #include "zen/po.hpp" +#include "zen/fs/io.hpp" #include "bolt/CST.hpp" #include "bolt/CSTVisitor.hpp" @@ -19,6 +25,8 @@ #include "bolt/Evaluator.hpp" #include "bolt/Program.hpp" +#include "LLVMCodeGen.hpp" + using namespace bolt; /** @@ -27,21 +35,6 @@ using namespace bolt; */ const constexpr int XARGS_STOP_LOOP = 255; -ByteString readFile(std::string Path) { - - std::ifstream File(Path); - ByteString Out; - - File.seekg(0, std::ios::end); - Out.reserve(File.tellg()); - File.seekg(0, std::ios::beg); - - Out.assign((std::istreambuf_iterator(File)), - std::istreambuf_iterator()); - - return Out; -} - namespace po = zen::po; auto getAllTokens(Stream& S) { @@ -67,10 +60,12 @@ int main(int Argc, const char* Argv[]) { .subcommand( po::command("verify", "Verify integrity of the compiler on selected file(s)") .pos_arg("file", po::some)) + .subcommand( + po::command("build", "Build sources into a library or executable") + .pos_arg("file", po::some)) .subcommand( po::command("eval", "Run sources") - .pos_arg("file", po::some) - .fallback()) + .pos_arg("file", po::some)) .parse_args(Argc, Argv) .unwrap(); @@ -91,16 +86,21 @@ int main(int Argc, const char* Argv[]) { for (auto Filename: Submatch->get_pos_args()) { - auto Text = readFile(Filename); + auto ReadResult = zen::fs::read_file(Filename); + if (!ReadResult) { + DE.add(Filename, ReadResult.left()); + continue; + } + ByteString Text { ReadResult->c_str(), ReadResult->size() }; TextFile File { Filename, Text }; - VectorStream Chars(Text, EOF); - Scanner S(DE, File, Chars); - Punctuator PT(S); - auto Buffer = getAllTokens(PT); - Parser P(File, DE); + VectorStream Chars { Text, EOF }; + Scanner TheScanner(DE, File, Chars); + Punctuator ThePunctuator(TheScanner); + auto Buffer = getAllTokens(ThePunctuator); + Parser TheParser(File, DE); TokenStream Tokens { Buffer }; - auto SF = P.parseSourceFile(Tokens); + auto SF = TheParser.parseSourceFile(Tokens); if (SF == nullptr) { continue; } @@ -201,7 +201,36 @@ int main(int Argc, const char* Argv[]) { return 255; } - if (Name == "eval") { + if (Name == "build") { + + // auto HostABI = "x86_64"; + // auto TripleStr = "x86_64-pc-linux-gnu"; + + // std::string Error; + // auto Target = llvm::TargetRegistry::lookupTarget(TripleStr, Error); + // if (!Target) { + // error("failed to create codegen target: {}\n", Error); + // return 255; + // } + + llvm::LLVMContext TheContext; + for (auto SF: Prog.getSourceFiles()) { + + LLVMCodeGen CG { TheContext, Prog.getTypeChecker(SF) }; + auto Module = CG.generate(SF); + + auto SourcePath = SF->getFilePath(); + + auto IRPath = SourcePath.parent_path() / (SourcePath.stem().string() + ".ll"); + + std::cerr << IRPath << "\n"; + + // std::error_code EC; + // llvm::raw_fd_ostream OS { IRPath, EC }; + // Module->print(OS, nullptr); + } + + } else if (Name == "eval") { Evaluator E; Env GlobalEnv; GlobalEnv.add("print", Value::binding([](auto Args) { @@ -213,6 +242,7 @@ int main(int Argc, const char* Argv[]) { // TODO add a SourceFile-local env that inherits from GlobalEnv E.evaluate(SF, GlobalEnv); } + } return 0;