From 8e3521f6e06ee217cb8d395d0ab4de6e6f610dbb Mon Sep 17 00:00:00 2001 From: "Prof. Ronald Moore" <ronald.moore@h-da.de> Date: Wed, 29 Apr 2020 15:56:49 +0200 Subject: [PATCH] Imported LLVM example --- LLVM/cplusplus/KaleidoscopeJIT.h | 139 +++ LLVM/cplusplus/LICENSE.TXT | 279 ++++++ LLVM/cplusplus/Makefile | 103 ++ LLVM/cplusplus/ModuleMaker.cpp | 70 ++ LLVM/cplusplus/README.md | 132 +++ LLVM/cplusplus/fibonacci.cpp | 148 +++ LLVM/cplusplus/tests/fibonacci.kal | 9 + LLVM/cplusplus/tests/test2.kal | 4 + LLVM/cplusplus/tests/test3.kal | 7 + LLVM/cplusplus/tests/test4.kal | 12 + LLVM/cplusplus/tests/test5.kal | 26 + LLVM/cplusplus/tests/test6.kal | 101 ++ LLVM/cplusplus/tests/test7.kal | 22 + LLVM/cplusplus/tests/test8.cpp | 9 + LLVM/cplusplus/tests/test8.kal | 1 + LLVM/cplusplus/tests/test9.kal | 7 + LLVM/cplusplus/toy2.cpp | 447 +++++++++ LLVM/cplusplus/toy3.cpp | 610 ++++++++++++ LLVM/cplusplus/toy4.cpp | 701 ++++++++++++++ LLVM/cplusplus/toy5.cpp | 975 +++++++++++++++++++ LLVM/cplusplus/toy6.cpp | 1094 +++++++++++++++++++++ LLVM/cplusplus/toy7.cpp | 1265 ++++++++++++++++++++++++ LLVM/cplusplus/toy8.cpp | 1270 ++++++++++++++++++++++++ LLVM/cplusplus/toy9.cpp | 1452 ++++++++++++++++++++++++++++ 24 files changed, 8883 insertions(+) create mode 100644 LLVM/cplusplus/KaleidoscopeJIT.h create mode 100644 LLVM/cplusplus/LICENSE.TXT create mode 100644 LLVM/cplusplus/Makefile create mode 100644 LLVM/cplusplus/ModuleMaker.cpp create mode 100644 LLVM/cplusplus/README.md create mode 100644 LLVM/cplusplus/fibonacci.cpp create mode 100644 LLVM/cplusplus/tests/fibonacci.kal create mode 100644 LLVM/cplusplus/tests/test2.kal create mode 100644 LLVM/cplusplus/tests/test3.kal create mode 100644 LLVM/cplusplus/tests/test4.kal create mode 100644 LLVM/cplusplus/tests/test5.kal create mode 100644 LLVM/cplusplus/tests/test6.kal create mode 100644 LLVM/cplusplus/tests/test7.kal create mode 100644 LLVM/cplusplus/tests/test8.cpp create mode 100644 LLVM/cplusplus/tests/test8.kal create mode 100644 LLVM/cplusplus/tests/test9.kal create mode 100644 LLVM/cplusplus/toy2.cpp create mode 100644 LLVM/cplusplus/toy3.cpp create mode 100644 LLVM/cplusplus/toy4.cpp create mode 100644 LLVM/cplusplus/toy5.cpp create mode 100644 LLVM/cplusplus/toy6.cpp create mode 100644 LLVM/cplusplus/toy7.cpp create mode 100644 LLVM/cplusplus/toy8.cpp create mode 100644 LLVM/cplusplus/toy9.cpp diff --git a/LLVM/cplusplus/KaleidoscopeJIT.h b/LLVM/cplusplus/KaleidoscopeJIT.h new file mode 100644 index 0000000..a253a97 --- /dev/null +++ b/LLVM/cplusplus/KaleidoscopeJIT.h @@ -0,0 +1,139 @@ +//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Contains a simple JIT definition for use in the kaleidoscope tutorials. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H +#define LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/CompileUtils.h" +#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" +#include "llvm/ExecutionEngine/Orc/LambdaResolver.h" +#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" +#include "llvm/ExecutionEngine/SectionMemoryManager.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include <algorithm> +#include <map> +#include <memory> +#include <string> +#include <vector> + +namespace llvm { +namespace orc { + +class KaleidoscopeJIT { +public: + using ObjLayerT = LegacyRTDyldObjectLinkingLayer; + using CompileLayerT = LegacyIRCompileLayer<ObjLayerT, SimpleCompiler>; + + KaleidoscopeJIT() + : Resolver(createLegacyLookupResolver( + ES, + [this](const std::string &Name) { return findMangledSymbol(Name); }, + [](Error Err) { cantFail(std::move(Err), "lookupFlags failed"); })), + TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), + ObjectLayer(AcknowledgeORCv1Deprecation, ES, + [this](VModuleKey) { + return ObjLayerT::Resources{ + std::make_shared<SectionMemoryManager>(), Resolver}; + }), + CompileLayer(AcknowledgeORCv1Deprecation, ObjectLayer, + SimpleCompiler(*TM)) { + llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr); + } + + TargetMachine &getTargetMachine() { return *TM; } + + VModuleKey addModule(std::unique_ptr<Module> M) { + auto K = ES.allocateVModule(); + cantFail(CompileLayer.addModule(K, std::move(M))); + ModuleKeys.push_back(K); + return K; + } + + void removeModule(VModuleKey K) { + ModuleKeys.erase(find(ModuleKeys, K)); + cantFail(CompileLayer.removeModule(K)); + } + + JITSymbol findSymbol(const std::string Name) { + return findMangledSymbol(mangle(Name)); + } + +private: + std::string mangle(const std::string &Name) { + std::string MangledName; + { + raw_string_ostream MangledNameStream(MangledName); + Mangler::getNameWithPrefix(MangledNameStream, Name, DL); + } + return MangledName; + } + + JITSymbol findMangledSymbol(const std::string &Name) { +#ifdef _WIN32 + // The symbol lookup of ObjectLinkingLayer uses the SymbolRef::SF_Exported + // flag to decide whether a symbol will be visible or not, when we call + // IRCompileLayer::findSymbolIn with ExportedSymbolsOnly set to true. + // + // But for Windows COFF objects, this flag is currently never set. + // For a potential solution see: https://reviews.llvm.org/rL258665 + // For now, we allow non-exported symbols on Windows as a workaround. + const bool ExportedSymbolsOnly = false; +#else + const bool ExportedSymbolsOnly = true; +#endif + + // Search modules in reverse order: from last added to first added. + // This is the opposite of the usual search order for dlsym, but makes more + // sense in a REPL where we want to bind to the newest available definition. + for (auto H : make_range(ModuleKeys.rbegin(), ModuleKeys.rend())) + if (auto Sym = CompileLayer.findSymbolIn(H, Name, ExportedSymbolsOnly)) + return Sym; + + // If we can't find the symbol in the JIT, try looking in the host process. + if (auto SymAddr = RTDyldMemoryManager::getSymbolAddressInProcess(Name)) + return JITSymbol(SymAddr, JITSymbolFlags::Exported); + +#ifdef _WIN32 + // For Windows retry without "_" at beginning, as RTDyldMemoryManager uses + // GetProcAddress and standard libraries like msvcrt.dll use names + // with and without "_" (for example "_itoa" but "sin"). + if (Name.length() > 2 && Name[0] == '_') + if (auto SymAddr = + RTDyldMemoryManager::getSymbolAddressInProcess(Name.substr(1))) + return JITSymbol(SymAddr, JITSymbolFlags::Exported); +#endif + + return nullptr; + } + + ExecutionSession ES; + std::shared_ptr<SymbolResolver> Resolver; + std::unique_ptr<TargetMachine> TM; + const DataLayout DL; + ObjLayerT ObjectLayer; + CompileLayerT CompileLayer; + std::vector<VModuleKey> ModuleKeys; +}; + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H diff --git a/LLVM/cplusplus/LICENSE.TXT b/LLVM/cplusplus/LICENSE.TXT new file mode 100644 index 0000000..fa6ac54 --- /dev/null +++ b/LLVM/cplusplus/LICENSE.TXT @@ -0,0 +1,279 @@ +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +============================================================================== +Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2003-2019 University of Illinois at Urbana-Champaign. +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + diff --git a/LLVM/cplusplus/Makefile b/LLVM/cplusplus/Makefile new file mode 100644 index 0000000..5234e25 --- /dev/null +++ b/LLVM/cplusplus/Makefile @@ -0,0 +1,103 @@ +# Very rudimentary Makefile for the llvm tutorial +# See http://llvm.org/docs/tutorial/ +# See also http://koichitamura.blogspot.de/2011/01/since-i-went-to-held-several-weeks-ago.html +# for a necessary "fix" (!) +# +# This Makefile written by Prof. R. C. Moore, fbi.h-da.de + +PROGS := ModuleMaker fibonacci toy2 toy3 toy4 toy5 toy6 toy7 toy8 toy9 + +# Uncomment only one of the next two lines (choose your c++ compiler) +# CC=g++ +CC := clang++ + +# Now, how to compile in LLVM? +# LLVM VERSION 3.9.1 (2017) and possibly LLVM Version 4 +# The Tutorial (Chapter 3) does it like this: +# clang++ -g -O3 toy.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core` -o toy +# based on that, and some experimentation, we got, for LLVM 3.6... +# CFLAGS=-std=c++11 `llvm-config --cxxflags --ldflags --system-libs --libs all` -rdynamic -O3 +# Now, for llvm >= 3.7.1, further experience has brought us to.... +# At one time, we piped the output of llvm-config to sed s/no-maybe/no/ . +# Hopefully this is no longer necessary... ? +# (sed was used to hide an incompatibility between llvm-config and clang) + +LLVM_FLAGS := `llvm-config --cxxflags --ldflags --system-libs --libs all ` + +CFLAGS := $(LLVM_FLAGS) -rdynamic -O3 + + +## More preliminaries +# See https://www.gnu.org/software/make/manual/html_node/Special-Targets.html +# In this makefile, we want to keep going even if we find errors +.IGNORE : + +# Further, we do not want multiple things built at once, even if make called with -j2 +.NOTPARALLEL : + +# Tell make that the following "targets" are "phony" +# Cf. https://www.gnu.org/software/make/manual/html_node/Phony-Targets.html#Phony-Targets +.PHONY : all tests clean testclean + +## Now, the targets -- the things that will get made! + +all: $(PROGS) + +$(PROGS): %: %.cpp + $(CC) -g $< $(CFLAGS) -o $@ + +clean: testclean + $(RM) -fv *~ $(PROGS) ModuleMaker.bc fib test8 test8.o test9 + +testclean: + rm -fv *~ *.output test8.o test9 + +testModuleMaker: ModuleMaker + echo && echo "Testing ModuleMaker (expect 5)...\n" + @./ModuleMaker >ModuleMaker.bc + @lli ModuleMaker.bc || echo $$? + +# WARNING: testFibonacci broken (with LLVM and clang 3.7.1) +testFibonacci: fibonacci + echo && echo "\n===================> Testing fibonacci (expect 46368)...\n" + -./fibonacci + +test2: toy2 tests/test2.kal + echo && echo "\n===================> Testing toy2.. (expecting 1 error)\n" + -./toy2 <tests/test2.kal + +test3: toy3 tests/test3.kal + echo && echo "\n===================> Testing toy3...\n" + -./toy3 <tests/test3.kal + +test4: toy4 tests/test4.kal + echo && echo "\n===================> Testing toy4...\n" + -./toy4 <tests/test4.kal + +test5: toy5 tests/test5.kal + echo && echo "\n===================> Testing toy5...\n" + -./toy5 <tests/test5.kal + +test6: toy6 tests/test6.kal + echo && echo "\n===================> Testing toy6...\n" + -./toy6 <tests/test6.kal + +test7: toy7 tests/test7.kal + echo && echo "\n===================> Testing toy7...\n" + -./toy7 <tests/test7.kal + +test8: toy8 tests/test8.kal toy8.cpp + echo && echo "\n===================> Testing toy8... (expect 42, of course)\n" + -./toy8 <tests/test8.kal + -clang++ tests/test8.cpp test8.o -o test8 + -./test8 + +test9: toy9 tests/test9.kal + echo && echo "\n===================> Testing toy9...\n" + -./toy9 <tests/test9.kal |& clang -x ir - -o test9 + -./test9 + +# WARNING: test9 removed since broken (with LLVM and clang <= 3.9) +# NOTE: fibonacci added BACK to tests since it's working again! +tests: testModuleMaker testFibonacci test2 test3 test4 test5 test6 test7 test8 + diff --git a/LLVM/cplusplus/ModuleMaker.cpp b/LLVM/cplusplus/ModuleMaker.cpp new file mode 100644 index 0000000..beff40a --- /dev/null +++ b/LLVM/cplusplus/ModuleMaker.cpp @@ -0,0 +1,70 @@ +//===- examples/ModuleMaker/ModuleMaker.cpp - Example project ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This programs is a simple example that creates an LLVM module "from scratch", +// emitting it as a bitcode file to standard out. This is just to show how +// LLVM projects work and to demonstrate some of the LLVM APIs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +int main() { + LLVMContext Context; + + // Create the "module" or "program" or "translation unit" to hold the + // function + Module *M = new Module("test", Context); + + // Create the main function: first create the type 'int ()' + FunctionType *FT = + FunctionType::get(Type::getInt32Ty(Context), /*not vararg*/false); + + // By passing a module as the last parameter to the Function constructor, + // it automatically gets appended to the Module. + Function *F = Function::Create(FT, Function::ExternalLinkage, "main", M); + + // Add a basic block to the function... again, it automatically inserts + // because of the last argument. + BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", F); + + // Get pointers to the constant integers... + Value *Two = ConstantInt::get(Type::getInt32Ty(Context), 2); + Value *Three = ConstantInt::get(Type::getInt32Ty(Context), 3); + + // Create the add instruction... does not insert... + Instruction *Add = BinaryOperator::Create(Instruction::Add, Two, Three, + "addresult"); + + // explicitly insert it into the basic block... + BB->getInstList().push_back(Add); + + // Create the return instruction and add it to the basic block + BB->getInstList().push_back(ReturnInst::Create(Context, Add)); + + // Output the bitcode file to stdout + WriteBitcodeToFile(*M, outs()); + + // Delete the module and all of its contents. + delete M; + return 0; +} diff --git a/LLVM/cplusplus/README.md b/LLVM/cplusplus/README.md new file mode 100644 index 0000000..7419b40 --- /dev/null +++ b/LLVM/cplusplus/README.md @@ -0,0 +1,132 @@ +Introduction +============ + +This Directory contains various examples for working with the LLVM tools. + +All interesting files are from the LLVM project - +See http://llvm.org and in particular http://www.llvm.org/docs/ +for more information. + +This collection was prepared by Prof. R. C. Moore, fbi.h-da.de +in May 2017 for the Compiler Construction course. No warranty implied or expressed. +See LICENSE.TXT for the original license. + +This version has been prepared for LLVM 4.0 (using Arch Linux in a vagrant box) +in May 2017. + +This version can be fairly easily adpated to work with LLVM 3.9. +It does NOT work with earlier versions of LLVM. + +All of the example files (fibonacci.cpp, ModuleMaker.cpp and toy*.cpp): + + - are available in the LLVM source tree, for example by running the + following: + svn co https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_400/final/ + or by downloading the whole source tarball, where you will find a directory + named "examples", from: + http://llvm.org/releases/ + or directly from + http://releases.llvm.org/10.0.0/llvm-10.0.0.src.tar.xz + + - the directory structure has been simplified, some files have been renamed, + and a new Makefile is provided which is much simpler than the original cmake + system (but not guarenteed to work outside Linux). Test files have also + been added. See below. + +Still, it should be emphasized that there is nothing really new here -- apart +from the packaging, everything here is from the LLVM project! + +Manifest ("Table of Contents") +=============================== + +You should have received the following files: + + * `fibonacci.cpp` +This is a compiler without a front-end, so to speak, +but with an interpreter built in - see ModuleMaker for +an even simpler example. + * `LICENSE.TXT` +The LLVM Release Open Source License +(taken verbatim from the University of Illinois/NCSA, +https://github.com/llvm/llvm-project/blob/master/llvm/LICENSE.TXT). + * `Makefile` +A Makefile, not supplied by LLVM but written instead +by R. Moore - to make this collection work without the +rest of the LLVM source tree. You will (of course) +need the LLVM tool set installed (clang, clang++ & +llvm-config and the llvm libs, at the very least). + * `ModuleMaker.cpp` +Another example provided in the LLVM source tree. +This file produces binary LLVM bit code for a program +which does nothing but return a status of 5. LLVM +bit code can be run using the LLVM "lli" tool. +Thus, you should be able to do the following: + +``` +make ModuleMaker +./ModuleMaker >status5.bc +lli status5.bc +echo $? # should print "5" +``` + +"Hello World" doesn't get much simpler than this! +(Running "make testModuleMaker" does this, by the way). + * `README.md` +This file. + * `KaleidoscopeJIT.h` +A header file used by some of the toy4.cpp - toy9.cpp. +This version works with LLVM 10. + * `tests` +A directory containing files in the Kaleidoscope +language, taken from the LLVM Tutorial, see +https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/index.html +Some, but not all, of these are used by "make tests". + * `toy2.cpp` - `toy9.cpp` +The source code from the Tutorial, Chapters 2 through 9. + +All of the above Chapters are available at +https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/index.html + +Building +========= + +(1) Install clang and the llvm tool kit. You can either compile from source + (see http://www.llvm.org/docs/GettingStarted.html - or perhaps better, + http://clang.llvm.org/get_started.html) or install binaries that you find + elsewhere (you SHOULD get LLVM 10.0.0 - No guarantees are made for older + OR NEWER versions!). + +(2) Run `make` (or perhaps `make clean` and then `make`). + +(3) If you wish, run `make tests` to run (almost) all the programs delivered + in this colleciton. The output from `make tests` is very long. + Each program can also be tested individually, for example with + `make testFibonacci` or `make testModuleMaker` or `make test5` + (read the Makefile to see all alternatives). + **Note**: `make test9` is broken. We know. + That's why it's not included in `make tests`. + Please feel free to fix it. + +Further Reading +=============== + +These files are only provided to illustrate how LLVM can be used. The code from +the tutorial should be read together with the tutorial. + +More information is available: + + * Overview of the LLVM Documentation: +http://llvm.org/releases/10.0.0/docs/index.html + * The LLVM ("Kaledeiscope") Tutorial: +https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/index.html + * Guide for Programmers: +http://llvm.org/releases/10.0.0/docs/ProgrammersManual.html + * Guide to the LLVM IR (LLVM Intermediate Representation): +http://llvm.org/releases/10.0.0/docs/LangRef.html + + + + + + + diff --git a/LLVM/cplusplus/fibonacci.cpp b/LLVM/cplusplus/fibonacci.cpp new file mode 100644 index 0000000..12393a4 --- /dev/null +++ b/LLVM/cplusplus/fibonacci.cpp @@ -0,0 +1,148 @@ +//===--- examples/Fibonacci/fibonacci.cpp - An example use of the JIT -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This small program provides an example of how to build quickly a small module +// with function Fibonacci and execute it with the JIT. +// +// The goal of this snippet is to create in the memory the LLVM module +// consisting of one function as follow: +// +// int fib(int x) { +// if(x<=2) return 1; +// return fib(x-1)+fib(x-2); +// } +// +// Once we have this, we compile the module via JIT, then execute the `fib' +// function and return result to a driver, i.e. to a "host program". +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APInt.h" +#include "llvm/IR/Verifier.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/ExecutionEngine/MCJIT.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstdlib> +#include <memory> +#include <string> +#include <vector> + +using namespace llvm; + +static Function *CreateFibFunction(Module *M, LLVMContext &Context) { + // Create the fib function and insert it into module M. This function is said + // to return an int and take an int parameter. + FunctionType *FibFTy = FunctionType::get(Type::getInt32Ty(Context), + {Type::getInt32Ty(Context)}, false); + Function *FibF = + Function::Create(FibFTy, Function::ExternalLinkage, "fib", M); + + // Add a basic block to the function. + BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", FibF); + + // Get pointers to the constants. + Value *One = ConstantInt::get(Type::getInt32Ty(Context), 1); + Value *Two = ConstantInt::get(Type::getInt32Ty(Context), 2); + + // Get pointer to the integer argument of the add1 function... + Argument *ArgX = &*FibF->arg_begin(); // Get the arg. + ArgX->setName("AnArg"); // Give it a nice symbolic name for fun. + + // Create the true_block. + BasicBlock *RetBB = BasicBlock::Create(Context, "return", FibF); + // Create an exit block. + BasicBlock* RecurseBB = BasicBlock::Create(Context, "recurse", FibF); + + // Create the "if (arg <= 2) goto exitbb" + Value *CondInst = new ICmpInst(*BB, ICmpInst::ICMP_SLE, ArgX, Two, "cond"); + BranchInst::Create(RetBB, RecurseBB, CondInst, BB); + + // Create: ret int 1 + ReturnInst::Create(Context, One, RetBB); + + // create fib(x-1) + Value *Sub = BinaryOperator::CreateSub(ArgX, One, "arg", RecurseBB); + CallInst *CallFibX1 = CallInst::Create(FibF, Sub, "fibx1", RecurseBB); + CallFibX1->setTailCall(); + + // create fib(x-2) + Sub = BinaryOperator::CreateSub(ArgX, Two, "arg", RecurseBB); + CallInst *CallFibX2 = CallInst::Create(FibF, Sub, "fibx2", RecurseBB); + CallFibX2->setTailCall(); + + // fib(x-1)+fib(x-2) + Value *Sum = BinaryOperator::CreateAdd(CallFibX1, CallFibX2, + "addresult", RecurseBB); + + // Create the return instruction and add it to the basic block + ReturnInst::Create(Context, Sum, RecurseBB); + + return FibF; +} + +int main(int argc, char **argv) { + int n = argc > 1 ? atol(argv[1]) : 24; + + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + LLVMContext Context; + + // Create some module to put our function into it. + std::unique_ptr<Module> Owner(new Module("test", Context)); + Module *M = Owner.get(); + + // We are about to create the "fib" function: + Function *FibF = CreateFibFunction(M, Context); + + // Now we going to create JIT + std::string errStr; + ExecutionEngine *EE = + EngineBuilder(std::move(Owner)) + .setErrorStr(&errStr) + .create(); + + if (!EE) { + errs() << argv[0] << ": Failed to construct ExecutionEngine: " << errStr + << "\n"; + return 1; + } + + errs() << "verifying... "; + if (verifyModule(*M)) { + errs() << argv[0] << ": Error constructing function!\n"; + return 1; + } + + errs() << "OK\n"; + errs() << "We just constructed this LLVM module:\n\n---------\n" << *M; + errs() << "---------\nstarting fibonacci(" << n << ") with JIT...\n"; + + // Call the Fibonacci function with argument n: + std::vector<GenericValue> Args(1); + Args[0].IntVal = APInt(32, n); + GenericValue GV = EE->runFunction(FibF, Args); + + // import result of execution + outs() << "Result: " << GV.IntVal << "\n"; + + return 0; +} diff --git a/LLVM/cplusplus/tests/fibonacci.kal b/LLVM/cplusplus/tests/fibonacci.kal new file mode 100644 index 0000000..96f27c3 --- /dev/null +++ b/LLVM/cplusplus/tests/fibonacci.kal @@ -0,0 +1,9 @@ +# Compute the x'th fibonacci number. +def fib(x) + if x < 3 then + 1 + else + fib(x-1)+fib(x-2) + +# This expression will compute the 40th number. +fib(40) diff --git a/LLVM/cplusplus/tests/test2.kal b/LLVM/cplusplus/tests/test2.kal new file mode 100644 index 0000000..0237059 --- /dev/null +++ b/LLVM/cplusplus/tests/test2.kal @@ -0,0 +1,4 @@ +def foo(x y) x+foo(y, 4.0); +def foo(x y) x+y y; +def foo(x y) x+y ); +extern sin(a); diff --git a/LLVM/cplusplus/tests/test3.kal b/LLVM/cplusplus/tests/test3.kal new file mode 100644 index 0000000..2f36ed8 --- /dev/null +++ b/LLVM/cplusplus/tests/test3.kal @@ -0,0 +1,7 @@ +# Compare http://llvm.org/docs/tutorial/LangImpl3.html#driver-changes-and-closing-thoughts +4+5; +def foo(a b) a*a + 2*a*b + b*b; +def bar(a) foo(a, 4.0) + bar(31337); +extern cos(x); +cos(1.234); + diff --git a/LLVM/cplusplus/tests/test4.kal b/LLVM/cplusplus/tests/test4.kal new file mode 100644 index 0000000..97a8f96 --- /dev/null +++ b/LLVM/cplusplus/tests/test4.kal @@ -0,0 +1,12 @@ +# Compare http://llvm.org/docs/tutorial/LangImpl4.html#adding-a-jit-compiler +4+5; +def testfunc(x y) x + y*2; +testfunc(4, 10); +extern sin(x); +extern cos(x); +sin(1.0); +def foo(x) sin(x)*sin(x) + cos(x)*cos(x); +foo(4.0); +foo(42.0); +extern putchard(x); +putchard(120); diff --git a/LLVM/cplusplus/tests/test5.kal b/LLVM/cplusplus/tests/test5.kal new file mode 100644 index 0000000..5faee3b --- /dev/null +++ b/LLVM/cplusplus/tests/test5.kal @@ -0,0 +1,26 @@ +# http://llvm.org/docs/tutorial/LangImpl5.html +extern foo(); +extern bar(); +def baz(x) if x then foo() else bar(); +# +def fib(x) + if x < 3 then + 1 + else + fib(x-1)+fib(x-2); +# +fib(1); +fib(2); +fib(3); +fib(4); +fib(10); +fib(20); +# +extern putchard(char) +def printstar(n) + for i = 1, i < n, 1.0 in + putchard(42); # ascii 42 = '*'; + +# print 100 '*' characters +printstar(100); + diff --git a/LLVM/cplusplus/tests/test6.kal b/LLVM/cplusplus/tests/test6.kal new file mode 100644 index 0000000..70b2d6d --- /dev/null +++ b/LLVM/cplusplus/tests/test6.kal @@ -0,0 +1,101 @@ +# Cf. http://llvm.org/docs/tutorial/LangImpl6.html#kicking-the-tires +# +extern printd(x); +def binary : 1 (x y) 0; # Low-precedence operator that ignores operands. +printd(123) : printd(456) : printd(789); + +# Logical unary not. +def unary!(v) + if v then + 0 + else + 1; + +# Unary negate. +def unary-(v) + 0-v; + +# Define > with the same precedence as <. +def binary> 10 (LHS RHS) + RHS < LHS; + +# Binary logical or, which does not short circuit. +def binary| 5 (LHS RHS) + if LHS then + 1 + else if RHS then + 1 + else + 0; + +# Binary logical and, which does not short circuit. +def binary& 6 (LHS RHS) + if !LHS then + 0 + else + !!RHS; + +# Define = with slightly lower precedence than relationals. +def binary = 9 (LHS RHS) + !(LHS < RHS | LHS > RHS); + +# Define ':' for sequencing: as a low-precedence operator that ignores operands +# and just returns the RHS. +def binary : 1 (x y) y; + + +extern putchard(char) +def printdensity(d) + if d > 8 then + putchard(32) # ' ' + else if d > 4 then + putchard(46) # '.' + else if d > 2 then + putchard(43) # '+' + else + putchard(42); # '*' + +printdensity(1): printdensity(2): printdensity(3): +printdensity(4): printdensity(5): printdensity(9): +putchard(10); + +# Determine whether the specific location diverges. +# Solve for z = z^2 + c in the complex plane. +def mandleconverger(real imag iters creal cimag) + if iters > 255 | (real*real + imag*imag > 4) then + iters + else + mandleconverger(real*real - imag*imag + creal, + 2*real*imag + cimag, + iters+1, creal, cimag); + +# Return the number of iterations required for the iteration to escape +def mandleconverge(real imag) + mandleconverger(real, imag, 0, real, imag); + +# Compute and plot the mandlebrot set with the specified 2 dimensional range +# info. +def mandelhelp(xmin xmax xstep ymin ymax ystep) + for y = ymin, y < ymax, ystep in ( + (for x = xmin, x < xmax, xstep in + printdensity(mandleconverge(x,y))) + : putchard(10) + ) + +# mandel - This is a convenient helper function for plotting the mandelbrot set +# from the specified position with the specified Magnification. +def mandel(realstart imagstart realmag imagmag) + mandelhelp(realstart, realstart+realmag*78, realmag, + imagstart, imagstart+imagmag*40, imagmag); + +mandel(-2.3, -1.3, 0.05, 0.07); + +mandel(-2, -1, 0.02, 0.04); + +mandel(-0.9, -1.4, 0.02, 0.03); + + + + + + diff --git a/LLVM/cplusplus/tests/test7.kal b/LLVM/cplusplus/tests/test7.kal new file mode 100644 index 0000000..cda8e39 --- /dev/null +++ b/LLVM/cplusplus/tests/test7.kal @@ -0,0 +1,22 @@ +# Define ':' for sequencing: as a low-precedence operator that ignores operands +# and just returns the RHS. +def binary : 1 (x y) y; + +# Recursive fib, we could do this before. +def fib(x) + if (x < 3) then + 1 + else + fib(x-1)+fib(x-2); + +# Iterative fib. +def fibi(x) + var a = 1, b = 1, c in + (for i = 3, i < x in + c = a + b : + a = b : + b = c) : + b; + +# Call it. +fibi(10); diff --git a/LLVM/cplusplus/tests/test8.cpp b/LLVM/cplusplus/tests/test8.cpp new file mode 100644 index 0000000..4f4539a --- /dev/null +++ b/LLVM/cplusplus/tests/test8.cpp @@ -0,0 +1,9 @@ +#include <iostream> + +extern "C" { + double average(double, double); +} + +int main() { + std::cout << "average of 40.0 and 44.0: " << average(40.0, 44.0) << std::endl; +} diff --git a/LLVM/cplusplus/tests/test8.kal b/LLVM/cplusplus/tests/test8.kal new file mode 100644 index 0000000..da902f4 --- /dev/null +++ b/LLVM/cplusplus/tests/test8.kal @@ -0,0 +1 @@ +def average(x y) (x + y) * 0.5; diff --git a/LLVM/cplusplus/tests/test9.kal b/LLVM/cplusplus/tests/test9.kal new file mode 100644 index 0000000..bff294a --- /dev/null +++ b/LLVM/cplusplus/tests/test9.kal @@ -0,0 +1,7 @@ +def fib(x) + if x < 3 then + 1 + else + fib(x-1)+fib(x-2) + +fib(10) diff --git a/LLVM/cplusplus/toy2.cpp b/LLVM/cplusplus/toy2.cpp new file mode 100644 index 0000000..59432fb --- /dev/null +++ b/LLVM/cplusplus/toy2.cpp @@ -0,0 +1,447 @@ +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +#include <cctype> +#include <cstdio> +#include <cstdlib> +#include <map> +#include <memory> +#include <string> +#include <vector> + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, + tok_extern = -3, + + // primary + tok_identifier = -4, + tok_number = -5 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") + return tok_def; + if (IdentifierStr == "extern") + return tok_extern; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), nullptr); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do + LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +namespace { + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() = default; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; + +public: + NumberExprAST(double Val) : Val(Val) {} +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; + +public: + VariableExprAST(const std::string &Name) : Name(Name) {} +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + std::unique_ptr<ExprAST> LHS, RHS; + +public: + BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, + std::unique_ptr<ExprAST> RHS) + : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<std::unique_ptr<ExprAST>> Args; + +public: + CallExprAST(const std::string &Callee, + std::vector<std::unique_ptr<ExprAST>> Args) + : Callee(Callee), Args(std::move(Args)) {} +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes). +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; + +public: + PrototypeAST(const std::string &Name, std::vector<std::string> Args) + : Name(Name), Args(std::move(Args)) {} + + const std::string &getName() const { return Name; } +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + std::unique_ptr<PrototypeAST> Proto; + std::unique_ptr<ExprAST> Body; + +public: + FunctionAST(std::unique_ptr<PrototypeAST> Proto, + std::unique_ptr<ExprAST> Body) + : Proto(std::move(Proto)), Body(std::move(Body)) {} +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { return CurTok = gettok(); } + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) + return -1; + return TokPrec; +} + +/// LogError* - These are little helper functions for error handling. +std::unique_ptr<ExprAST> LogError(const char *Str) { + fprintf(stderr, "Error: %s\n", Str); + return nullptr; +} +std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { + LogError(Str); + return nullptr; +} + +static std::unique_ptr<ExprAST> ParseExpression(); + +/// numberexpr ::= number +static std::unique_ptr<ExprAST> ParseNumberExpr() { + auto Result = std::make_unique<NumberExprAST>(NumVal); + getNextToken(); // consume the number + return std::move(Result); +} + +/// parenexpr ::= '(' expression ')' +static std::unique_ptr<ExprAST> ParseParenExpr() { + getNextToken(); // eat (. + auto V = ParseExpression(); + if (!V) + return nullptr; + + if (CurTok != ')') + return LogError("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static std::unique_ptr<ExprAST> ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return std::make_unique<VariableExprAST>(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<std::unique_ptr<ExprAST>> Args; + if (CurTok != ')') { + while (true) { + if (auto Arg = ParseExpression()) + Args.push_back(std::move(Arg)); + else + return nullptr; + + if (CurTok == ')') + break; + + if (CurTok != ',') + return LogError("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return std::make_unique<CallExprAST>(IdName, std::move(Args)); +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +static std::unique_ptr<ExprAST> ParsePrimary() { + switch (CurTok) { + default: + return LogError("unknown token when expecting an expression"); + case tok_identifier: + return ParseIdentifierExpr(); + case tok_number: + return ParseNumberExpr(); + case '(': + return ParseParenExpr(); + } +} + +/// binoprhs +/// ::= ('+' primary)* +static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, + std::unique_ptr<ExprAST> LHS) { + // If this is a binop, find its precedence. + while (true) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + auto RHS = ParsePrimary(); + if (!RHS) + return nullptr; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); + if (!RHS) + return nullptr; + } + + // Merge LHS/RHS. + LHS = std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), + std::move(RHS)); + } +} + +/// expression +/// ::= primary binoprhs +/// +static std::unique_ptr<ExprAST> ParseExpression() { + auto LHS = ParsePrimary(); + if (!LHS) + return nullptr; + + return ParseBinOpRHS(0, std::move(LHS)); +} + +/// prototype +/// ::= id '(' id* ')' +static std::unique_ptr<PrototypeAST> ParsePrototype() { + if (CurTok != tok_identifier) + return LogErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return LogErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return LogErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return std::make_unique<PrototypeAST>(FnName, std::move(ArgNames)); +} + +/// definition ::= 'def' prototype expression +static std::unique_ptr<FunctionAST> ParseDefinition() { + getNextToken(); // eat def. + auto Proto = ParsePrototype(); + if (!Proto) + return nullptr; + + if (auto E = ParseExpression()) + return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); + return nullptr; +} + +/// toplevelexpr ::= expression +static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { + if (auto E = ParseExpression()) { + // Make an anonymous proto. + auto Proto = std::make_unique<PrototypeAST>("__anon_expr", + std::vector<std::string>()); + return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); + } + return nullptr; +} + +/// external ::= 'extern' prototype +static std::unique_ptr<PrototypeAST> ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing +//===----------------------------------------------------------------------===// + +static void HandleDefinition() { + if (ParseDefinition()) { + fprintf(stderr, "Parsed a function definition.\n"); + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (ParseExtern()) { + fprintf(stderr, "Parsed an extern\n"); + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (ParseTopLevelExpr()) { + fprintf(stderr, "Parsed a top-level expr\n"); + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (true) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: + return; + case ';': // ignore top-level semicolons. + getNextToken(); + break; + case tok_def: + HandleDefinition(); + break; + case tok_extern: + HandleExtern(); + break; + default: + HandleTopLevelExpression(); + break; + } + } +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Run the main "interpreter loop" now. + MainLoop(); + + return 0; +} diff --git a/LLVM/cplusplus/toy3.cpp b/LLVM/cplusplus/toy3.cpp new file mode 100644 index 0000000..557780d --- /dev/null +++ b/LLVM/cplusplus/toy3.cpp @@ -0,0 +1,610 @@ +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Verifier.h" +#include <algorithm> +#include <cctype> +#include <cstdio> +#include <cstdlib> +#include <map> +#include <memory> +#include <string> +#include <vector> + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, + tok_extern = -3, + + // primary + tok_identifier = -4, + tok_number = -5 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") + return tok_def; + if (IdentifierStr == "extern") + return tok_extern; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), nullptr); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do + LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +namespace { + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() = default; + + virtual Value *codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; + +public: + NumberExprAST(double Val) : Val(Val) {} + + Value *codegen() override; +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; + +public: + VariableExprAST(const std::string &Name) : Name(Name) {} + + Value *codegen() override; +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + std::unique_ptr<ExprAST> LHS, RHS; + +public: + BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, + std::unique_ptr<ExprAST> RHS) + : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} + + Value *codegen() override; +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<std::unique_ptr<ExprAST>> Args; + +public: + CallExprAST(const std::string &Callee, + std::vector<std::unique_ptr<ExprAST>> Args) + : Callee(Callee), Args(std::move(Args)) {} + + Value *codegen() override; +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes). +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; + +public: + PrototypeAST(const std::string &Name, std::vector<std::string> Args) + : Name(Name), Args(std::move(Args)) {} + + Function *codegen(); + const std::string &getName() const { return Name; } +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + std::unique_ptr<PrototypeAST> Proto; + std::unique_ptr<ExprAST> Body; + +public: + FunctionAST(std::unique_ptr<PrototypeAST> Proto, + std::unique_ptr<ExprAST> Body) + : Proto(std::move(Proto)), Body(std::move(Body)) {} + + Function *codegen(); +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { return CurTok = gettok(); } + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) + return -1; + return TokPrec; +} + +/// LogError* - These are little helper functions for error handling. +std::unique_ptr<ExprAST> LogError(const char *Str) { + fprintf(stderr, "Error: %s\n", Str); + return nullptr; +} + +std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { + LogError(Str); + return nullptr; +} + +static std::unique_ptr<ExprAST> ParseExpression(); + +/// numberexpr ::= number +static std::unique_ptr<ExprAST> ParseNumberExpr() { + auto Result = std::make_unique<NumberExprAST>(NumVal); + getNextToken(); // consume the number + return std::move(Result); +} + +/// parenexpr ::= '(' expression ')' +static std::unique_ptr<ExprAST> ParseParenExpr() { + getNextToken(); // eat (. + auto V = ParseExpression(); + if (!V) + return nullptr; + + if (CurTok != ')') + return LogError("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static std::unique_ptr<ExprAST> ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return std::make_unique<VariableExprAST>(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<std::unique_ptr<ExprAST>> Args; + if (CurTok != ')') { + while (true) { + if (auto Arg = ParseExpression()) + Args.push_back(std::move(Arg)); + else + return nullptr; + + if (CurTok == ')') + break; + + if (CurTok != ',') + return LogError("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return std::make_unique<CallExprAST>(IdName, std::move(Args)); +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +static std::unique_ptr<ExprAST> ParsePrimary() { + switch (CurTok) { + default: + return LogError("unknown token when expecting an expression"); + case tok_identifier: + return ParseIdentifierExpr(); + case tok_number: + return ParseNumberExpr(); + case '(': + return ParseParenExpr(); + } +} + +/// binoprhs +/// ::= ('+' primary)* +static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, + std::unique_ptr<ExprAST> LHS) { + // If this is a binop, find its precedence. + while (true) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + auto RHS = ParsePrimary(); + if (!RHS) + return nullptr; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); + if (!RHS) + return nullptr; + } + + // Merge LHS/RHS. + LHS = + std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS)); + } +} + +/// expression +/// ::= primary binoprhs +/// +static std::unique_ptr<ExprAST> ParseExpression() { + auto LHS = ParsePrimary(); + if (!LHS) + return nullptr; + + return ParseBinOpRHS(0, std::move(LHS)); +} + +/// prototype +/// ::= id '(' id* ')' +static std::unique_ptr<PrototypeAST> ParsePrototype() { + if (CurTok != tok_identifier) + return LogErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return LogErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return LogErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return std::make_unique<PrototypeAST>(FnName, std::move(ArgNames)); +} + +/// definition ::= 'def' prototype expression +static std::unique_ptr<FunctionAST> ParseDefinition() { + getNextToken(); // eat def. + auto Proto = ParsePrototype(); + if (!Proto) + return nullptr; + + if (auto E = ParseExpression()) + return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); + return nullptr; +} + +/// toplevelexpr ::= expression +static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { + if (auto E = ParseExpression()) { + // Make an anonymous proto. + auto Proto = std::make_unique<PrototypeAST>("__anon_expr", + std::vector<std::string>()); + return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); + } + return nullptr; +} + +/// external ::= 'extern' prototype +static std::unique_ptr<PrototypeAST> ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static LLVMContext TheContext; +static IRBuilder<> Builder(TheContext); +static std::unique_ptr<Module> TheModule; +static std::map<std::string, Value *> NamedValues; + +Value *LogErrorV(const char *Str) { + LogError(Str); + return nullptr; +} + +Value *NumberExprAST::codegen() { + return ConstantFP::get(TheContext, APFloat(Val)); +} + +Value *VariableExprAST::codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + if (!V) + return LogErrorV("Unknown variable name"); + return V; +} + +Value *BinaryExprAST::codegen() { + Value *L = LHS->codegen(); + Value *R = RHS->codegen(); + if (!L || !R) + return nullptr; + + switch (Op) { + case '+': + return Builder.CreateFAdd(L, R, "addtmp"); + case '-': + return Builder.CreateFSub(L, R, "subtmp"); + case '*': + return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext), "booltmp"); + default: + return LogErrorV("invalid binary operator"); + } +} + +Value *CallExprAST::codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (!CalleeF) + return LogErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return LogErrorV("Incorrect # arguments passed"); + + std::vector<Value *> ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->codegen()); + if (!ArgsV.back()) + return nullptr; + } + + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); +} + +Function *PrototypeAST::codegen() { + // Make the function type: double(double,double) etc. + std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(TheContext)); + FunctionType *FT = + FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false); + + Function *F = + Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); + + // Set names for all arguments. + unsigned Idx = 0; + for (auto &Arg : F->args()) + Arg.setName(Args[Idx++]); + + return F; +} + +Function *FunctionAST::codegen() { + // First, check for an existing function from a previous 'extern' declaration. + Function *TheFunction = TheModule->getFunction(Proto->getName()); + + if (!TheFunction) + TheFunction = Proto->codegen(); + + if (!TheFunction) + return nullptr; + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction); + Builder.SetInsertPoint(BB); + + // Record the function arguments in the NamedValues map. + NamedValues.clear(); + for (auto &Arg : TheFunction->args()) + NamedValues[std::string(Arg.getName())] = &Arg; + + if (Value *RetVal = Body->codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + return nullptr; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static void HandleDefinition() { + if (auto FnAST = ParseDefinition()) { + if (auto *FnIR = FnAST->codegen()) { + fprintf(stderr, "Read function definition:"); + FnIR->print(errs()); + fprintf(stderr, "\n"); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (auto ProtoAST = ParseExtern()) { + if (auto *FnIR = ProtoAST->codegen()) { + fprintf(stderr, "Read extern: "); + FnIR->print(errs()); + fprintf(stderr, "\n"); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (auto FnAST = ParseTopLevelExpr()) { + if (auto *FnIR = FnAST->codegen()) { + fprintf(stderr, "Read top-level expression:"); + FnIR->print(errs()); + fprintf(stderr, "\n"); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (true) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: + return; + case ';': // ignore top-level semicolons. + getNextToken(); + break; + case tok_def: + HandleDefinition(); + break; + case tok_extern: + HandleExtern(); + break; + default: + HandleTopLevelExpression(); + break; + } + } +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = std::make_unique<Module>("my cool jit", TheContext); + + // Run the main "interpreter loop" now. + MainLoop(); + + // Print out all of the generated code. + TheModule->print(errs(), nullptr); + + return 0; +} diff --git a/LLVM/cplusplus/toy4.cpp b/LLVM/cplusplus/toy4.cpp new file mode 100644 index 0000000..623b5dc --- /dev/null +++ b/LLVM/cplusplus/toy4.cpp @@ -0,0 +1,701 @@ +// Next Line modified for the FbI/h_da.de version (only) +#include "KaleidoscopeJIT.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/InstCombine/InstCombine.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include <algorithm> +#include <cassert> +#include <cctype> +#include <cstdint> +#include <cstdio> +#include <cstdlib> +#include <map> +#include <memory> +#include <string> +#include <vector> + +using namespace llvm; +using namespace llvm::orc; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, + tok_extern = -3, + + // primary + tok_identifier = -4, + tok_number = -5 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") + return tok_def; + if (IdentifierStr == "extern") + return tok_extern; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), nullptr); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do + LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +namespace { + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() = default; + + virtual Value *codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; + +public: + NumberExprAST(double Val) : Val(Val) {} + + Value *codegen() override; +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; + +public: + VariableExprAST(const std::string &Name) : Name(Name) {} + + Value *codegen() override; +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + std::unique_ptr<ExprAST> LHS, RHS; + +public: + BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, + std::unique_ptr<ExprAST> RHS) + : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} + + Value *codegen() override; +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<std::unique_ptr<ExprAST>> Args; + +public: + CallExprAST(const std::string &Callee, + std::vector<std::unique_ptr<ExprAST>> Args) + : Callee(Callee), Args(std::move(Args)) {} + + Value *codegen() override; +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes). +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; + +public: + PrototypeAST(const std::string &Name, std::vector<std::string> Args) + : Name(Name), Args(std::move(Args)) {} + + Function *codegen(); + const std::string &getName() const { return Name; } +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + std::unique_ptr<PrototypeAST> Proto; + std::unique_ptr<ExprAST> Body; + +public: + FunctionAST(std::unique_ptr<PrototypeAST> Proto, + std::unique_ptr<ExprAST> Body) + : Proto(std::move(Proto)), Body(std::move(Body)) {} + + Function *codegen(); +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { return CurTok = gettok(); } + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) + return -1; + return TokPrec; +} + +/// LogError* - These are little helper functions for error handling. +std::unique_ptr<ExprAST> LogError(const char *Str) { + fprintf(stderr, "Error: %s\n", Str); + return nullptr; +} + +std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { + LogError(Str); + return nullptr; +} + +static std::unique_ptr<ExprAST> ParseExpression(); + +/// numberexpr ::= number +static std::unique_ptr<ExprAST> ParseNumberExpr() { + auto Result = std::make_unique<NumberExprAST>(NumVal); + getNextToken(); // consume the number + return std::move(Result); +} + +/// parenexpr ::= '(' expression ')' +static std::unique_ptr<ExprAST> ParseParenExpr() { + getNextToken(); // eat (. + auto V = ParseExpression(); + if (!V) + return nullptr; + + if (CurTok != ')') + return LogError("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static std::unique_ptr<ExprAST> ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return std::make_unique<VariableExprAST>(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<std::unique_ptr<ExprAST>> Args; + if (CurTok != ')') { + while (true) { + if (auto Arg = ParseExpression()) + Args.push_back(std::move(Arg)); + else + return nullptr; + + if (CurTok == ')') + break; + + if (CurTok != ',') + return LogError("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return std::make_unique<CallExprAST>(IdName, std::move(Args)); +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +static std::unique_ptr<ExprAST> ParsePrimary() { + switch (CurTok) { + default: + return LogError("unknown token when expecting an expression"); + case tok_identifier: + return ParseIdentifierExpr(); + case tok_number: + return ParseNumberExpr(); + case '(': + return ParseParenExpr(); + } +} + +/// binoprhs +/// ::= ('+' primary)* +static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, + std::unique_ptr<ExprAST> LHS) { + // If this is a binop, find its precedence. + while (true) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + auto RHS = ParsePrimary(); + if (!RHS) + return nullptr; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); + if (!RHS) + return nullptr; + } + + // Merge LHS/RHS. + LHS = + std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS)); + } +} + +/// expression +/// ::= primary binoprhs +/// +static std::unique_ptr<ExprAST> ParseExpression() { + auto LHS = ParsePrimary(); + if (!LHS) + return nullptr; + + return ParseBinOpRHS(0, std::move(LHS)); +} + +/// prototype +/// ::= id '(' id* ')' +static std::unique_ptr<PrototypeAST> ParsePrototype() { + if (CurTok != tok_identifier) + return LogErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return LogErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return LogErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return std::make_unique<PrototypeAST>(FnName, std::move(ArgNames)); +} + +/// definition ::= 'def' prototype expression +static std::unique_ptr<FunctionAST> ParseDefinition() { + getNextToken(); // eat def. + auto Proto = ParsePrototype(); + if (!Proto) + return nullptr; + + if (auto E = ParseExpression()) + return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); + return nullptr; +} + +/// toplevelexpr ::= expression +static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { + if (auto E = ParseExpression()) { + // Make an anonymous proto. + auto Proto = std::make_unique<PrototypeAST>("__anon_expr", + std::vector<std::string>()); + return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); + } + return nullptr; +} + +/// external ::= 'extern' prototype +static std::unique_ptr<PrototypeAST> ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static LLVMContext TheContext; +static IRBuilder<> Builder(TheContext); +static std::unique_ptr<Module> TheModule; +static std::map<std::string, Value *> NamedValues; +static std::unique_ptr<legacy::FunctionPassManager> TheFPM; +static std::unique_ptr<KaleidoscopeJIT> TheJIT; +static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos; + +Value *LogErrorV(const char *Str) { + LogError(Str); + return nullptr; +} + +Function *getFunction(std::string Name) { + // First, see if the function has already been added to the current module. + if (auto *F = TheModule->getFunction(Name)) + return F; + + // If not, check whether we can codegen the declaration from some existing + // prototype. + auto FI = FunctionProtos.find(Name); + if (FI != FunctionProtos.end()) + return FI->second->codegen(); + + // If no existing prototype exists, return null. + return nullptr; +} + +Value *NumberExprAST::codegen() { + return ConstantFP::get(TheContext, APFloat(Val)); +} + +Value *VariableExprAST::codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + if (!V) + return LogErrorV("Unknown variable name"); + return V; +} + +Value *BinaryExprAST::codegen() { + Value *L = LHS->codegen(); + Value *R = RHS->codegen(); + if (!L || !R) + return nullptr; + + switch (Op) { + case '+': + return Builder.CreateFAdd(L, R, "addtmp"); + case '-': + return Builder.CreateFSub(L, R, "subtmp"); + case '*': + return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext), "booltmp"); + default: + return LogErrorV("invalid binary operator"); + } +} + +Value *CallExprAST::codegen() { + // Look up the name in the global module table. + Function *CalleeF = getFunction(Callee); + if (!CalleeF) + return LogErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return LogErrorV("Incorrect # arguments passed"); + + std::vector<Value *> ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->codegen()); + if (!ArgsV.back()) + return nullptr; + } + + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); +} + +Function *PrototypeAST::codegen() { + // Make the function type: double(double,double) etc. + std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(TheContext)); + FunctionType *FT = + FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false); + + Function *F = + Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); + + // Set names for all arguments. + unsigned Idx = 0; + for (auto &Arg : F->args()) + Arg.setName(Args[Idx++]); + + return F; +} + +Function *FunctionAST::codegen() { + // Transfer ownership of the prototype to the FunctionProtos map, but keep a + // reference to it for use below. + auto &P = *Proto; + FunctionProtos[Proto->getName()] = std::move(Proto); + Function *TheFunction = getFunction(P.getName()); + if (!TheFunction) + return nullptr; + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction); + Builder.SetInsertPoint(BB); + + // Record the function arguments in the NamedValues map. + NamedValues.clear(); + for (auto &Arg : TheFunction->args()) + NamedValues[std::string(Arg.getName())] = &Arg; + + if (Value *RetVal = Body->codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Run the optimizer on the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + return nullptr; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static void InitializeModuleAndPassManager() { + // Open a new module. + TheModule = std::make_unique<Module>("my cool jit", TheContext); + TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout()); + + // Create a new pass manager attached to it. + TheFPM = std::make_unique<legacy::FunctionPassManager>(TheModule.get()); + + // Do simple "peephole" optimizations and bit-twiddling optzns. + TheFPM->add(createInstructionCombiningPass()); + // Reassociate expressions. + TheFPM->add(createReassociatePass()); + // Eliminate Common SubExpressions. + TheFPM->add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + TheFPM->add(createCFGSimplificationPass()); + + TheFPM->doInitialization(); +} + +static void HandleDefinition() { + if (auto FnAST = ParseDefinition()) { + if (auto *FnIR = FnAST->codegen()) { + fprintf(stderr, "Read function definition:"); + FnIR->print(errs()); + fprintf(stderr, "\n"); + TheJIT->addModule(std::move(TheModule)); + InitializeModuleAndPassManager(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (auto ProtoAST = ParseExtern()) { + if (auto *FnIR = ProtoAST->codegen()) { + fprintf(stderr, "Read extern: "); + FnIR->print(errs()); + fprintf(stderr, "\n"); + FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (auto FnAST = ParseTopLevelExpr()) { + if (FnAST->codegen()) { + // JIT the module containing the anonymous expression, keeping a handle so + // we can free it later. + auto H = TheJIT->addModule(std::move(TheModule)); + InitializeModuleAndPassManager(); + + // Search the JIT for the __anon_expr symbol. + auto ExprSymbol = TheJIT->findSymbol("__anon_expr"); + assert(ExprSymbol && "Function not found"); + + // Get the symbol's address and cast it to the right type (takes no + // arguments, returns a double) so we can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); + fprintf(stderr, "Evaluated to %f\n", FP()); + + // Delete the anonymous expression module from the JIT. + TheJIT->removeModule(H); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (true) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: + return; + case ';': // ignore top-level semicolons. + getNextToken(); + break; + case tok_def: + HandleDefinition(); + break; + case tok_extern: + HandleExtern(); + break; + default: + HandleTopLevelExpression(); + break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +#ifdef _WIN32 +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT +#endif + +/// putchard - putchar that takes a double and returns 0. +extern "C" DLLEXPORT double putchard(double X) { + fputc((char)X, stderr); + return 0; +} + +/// printd - printf that takes a double prints it as "%f\n", returning 0. +extern "C" DLLEXPORT double printd(double X) { + fprintf(stderr, "%f\n", X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + InitializeNativeTargetAsmParser(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + TheJIT = std::make_unique<KaleidoscopeJIT>(); + + InitializeModuleAndPassManager(); + + // Run the main "interpreter loop" now. + MainLoop(); + + return 0; +} diff --git a/LLVM/cplusplus/toy5.cpp b/LLVM/cplusplus/toy5.cpp new file mode 100644 index 0000000..7264b80 --- /dev/null +++ b/LLVM/cplusplus/toy5.cpp @@ -0,0 +1,975 @@ +// Next Line modified for the FbI/h_da.de version (only) +#include "KaleidoscopeJIT.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/InstCombine/InstCombine.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include <algorithm> +#include <cassert> +#include <cctype> +#include <cstdint> +#include <cstdio> +#include <cstdlib> +#include <map> +#include <memory> +#include <string> +#include <vector> + +using namespace llvm; +using namespace llvm::orc; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, + tok_extern = -3, + + // primary + tok_identifier = -4, + tok_number = -5, + + // control + tok_if = -6, + tok_then = -7, + tok_else = -8, + tok_for = -9, + tok_in = -10 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") + return tok_def; + if (IdentifierStr == "extern") + return tok_extern; + if (IdentifierStr == "if") + return tok_if; + if (IdentifierStr == "then") + return tok_then; + if (IdentifierStr == "else") + return tok_else; + if (IdentifierStr == "for") + return tok_for; + if (IdentifierStr == "in") + return tok_in; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), nullptr); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do + LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +namespace { + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() = default; + + virtual Value *codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; + +public: + NumberExprAST(double Val) : Val(Val) {} + + Value *codegen() override; +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; + +public: + VariableExprAST(const std::string &Name) : Name(Name) {} + + Value *codegen() override; +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + std::unique_ptr<ExprAST> LHS, RHS; + +public: + BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, + std::unique_ptr<ExprAST> RHS) + : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} + + Value *codegen() override; +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<std::unique_ptr<ExprAST>> Args; + +public: + CallExprAST(const std::string &Callee, + std::vector<std::unique_ptr<ExprAST>> Args) + : Callee(Callee), Args(std::move(Args)) {} + + Value *codegen() override; +}; + +/// IfExprAST - Expression class for if/then/else. +class IfExprAST : public ExprAST { + std::unique_ptr<ExprAST> Cond, Then, Else; + +public: + IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then, + std::unique_ptr<ExprAST> Else) + : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} + + Value *codegen() override; +}; + +/// ForExprAST - Expression class for for/in. +class ForExprAST : public ExprAST { + std::string VarName; + std::unique_ptr<ExprAST> Start, End, Step, Body; + +public: + ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start, + std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step, + std::unique_ptr<ExprAST> Body) + : VarName(VarName), Start(std::move(Start)), End(std::move(End)), + Step(std::move(Step)), Body(std::move(Body)) {} + + Value *codegen() override; +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes). +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; + +public: + PrototypeAST(const std::string &Name, std::vector<std::string> Args) + : Name(Name), Args(std::move(Args)) {} + + Function *codegen(); + const std::string &getName() const { return Name; } +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + std::unique_ptr<PrototypeAST> Proto; + std::unique_ptr<ExprAST> Body; + +public: + FunctionAST(std::unique_ptr<PrototypeAST> Proto, + std::unique_ptr<ExprAST> Body) + : Proto(std::move(Proto)), Body(std::move(Body)) {} + + Function *codegen(); +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { return CurTok = gettok(); } + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) + return -1; + return TokPrec; +} + +/// LogError* - These are little helper functions for error handling. +std::unique_ptr<ExprAST> LogError(const char *Str) { + fprintf(stderr, "Error: %s\n", Str); + return nullptr; +} + +std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { + LogError(Str); + return nullptr; +} + +static std::unique_ptr<ExprAST> ParseExpression(); + +/// numberexpr ::= number +static std::unique_ptr<ExprAST> ParseNumberExpr() { + auto Result = std::make_unique<NumberExprAST>(NumVal); + getNextToken(); // consume the number + return std::move(Result); +} + +/// parenexpr ::= '(' expression ')' +static std::unique_ptr<ExprAST> ParseParenExpr() { + getNextToken(); // eat (. + auto V = ParseExpression(); + if (!V) + return nullptr; + + if (CurTok != ')') + return LogError("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static std::unique_ptr<ExprAST> ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return std::make_unique<VariableExprAST>(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<std::unique_ptr<ExprAST>> Args; + if (CurTok != ')') { + while (true) { + if (auto Arg = ParseExpression()) + Args.push_back(std::move(Arg)); + else + return nullptr; + + if (CurTok == ')') + break; + + if (CurTok != ',') + return LogError("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return std::make_unique<CallExprAST>(IdName, std::move(Args)); +} + +/// ifexpr ::= 'if' expression 'then' expression 'else' expression +static std::unique_ptr<ExprAST> ParseIfExpr() { + getNextToken(); // eat the if. + + // condition. + auto Cond = ParseExpression(); + if (!Cond) + return nullptr; + + if (CurTok != tok_then) + return LogError("expected then"); + getNextToken(); // eat the then + + auto Then = ParseExpression(); + if (!Then) + return nullptr; + + if (CurTok != tok_else) + return LogError("expected else"); + + getNextToken(); + + auto Else = ParseExpression(); + if (!Else) + return nullptr; + + return std::make_unique<IfExprAST>(std::move(Cond), std::move(Then), + std::move(Else)); +} + +/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression +static std::unique_ptr<ExprAST> ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return LogError("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return LogError("expected '=' after for"); + getNextToken(); // eat '='. + + auto Start = ParseExpression(); + if (!Start) + return nullptr; + if (CurTok != ',') + return LogError("expected ',' after for start value"); + getNextToken(); + + auto End = ParseExpression(); + if (!End) + return nullptr; + + // The step value is optional. + std::unique_ptr<ExprAST> Step; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (!Step) + return nullptr; + } + + if (CurTok != tok_in) + return LogError("expected 'in' after for"); + getNextToken(); // eat 'in'. + + auto Body = ParseExpression(); + if (!Body) + return nullptr; + + return std::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End), + std::move(Step), std::move(Body)); +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +/// ::= ifexpr +/// ::= forexpr +static std::unique_ptr<ExprAST> ParsePrimary() { + switch (CurTok) { + default: + return LogError("unknown token when expecting an expression"); + case tok_identifier: + return ParseIdentifierExpr(); + case tok_number: + return ParseNumberExpr(); + case '(': + return ParseParenExpr(); + case tok_if: + return ParseIfExpr(); + case tok_for: + return ParseForExpr(); + } +} + +/// binoprhs +/// ::= ('+' primary)* +static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, + std::unique_ptr<ExprAST> LHS) { + // If this is a binop, find its precedence. + while (true) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + auto RHS = ParsePrimary(); + if (!RHS) + return nullptr; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); + if (!RHS) + return nullptr; + } + + // Merge LHS/RHS. + LHS = + std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS)); + } +} + +/// expression +/// ::= primary binoprhs +/// +static std::unique_ptr<ExprAST> ParseExpression() { + auto LHS = ParsePrimary(); + if (!LHS) + return nullptr; + + return ParseBinOpRHS(0, std::move(LHS)); +} + +/// prototype +/// ::= id '(' id* ')' +static std::unique_ptr<PrototypeAST> ParsePrototype() { + if (CurTok != tok_identifier) + return LogErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return LogErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return LogErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return std::make_unique<PrototypeAST>(FnName, std::move(ArgNames)); +} + +/// definition ::= 'def' prototype expression +static std::unique_ptr<FunctionAST> ParseDefinition() { + getNextToken(); // eat def. + auto Proto = ParsePrototype(); + if (!Proto) + return nullptr; + + if (auto E = ParseExpression()) + return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); + return nullptr; +} + +/// toplevelexpr ::= expression +static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { + if (auto E = ParseExpression()) { + // Make an anonymous proto. + auto Proto = std::make_unique<PrototypeAST>("__anon_expr", + std::vector<std::string>()); + return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); + } + return nullptr; +} + +/// external ::= 'extern' prototype +static std::unique_ptr<PrototypeAST> ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static LLVMContext TheContext; +static IRBuilder<> Builder(TheContext); +static std::unique_ptr<Module> TheModule; +static std::map<std::string, Value *> NamedValues; +static std::unique_ptr<legacy::FunctionPassManager> TheFPM; +static std::unique_ptr<KaleidoscopeJIT> TheJIT; +static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos; + +Value *LogErrorV(const char *Str) { + LogError(Str); + return nullptr; +} + +Function *getFunction(std::string Name) { + // First, see if the function has already been added to the current module. + if (auto *F = TheModule->getFunction(Name)) + return F; + + // If not, check whether we can codegen the declaration from some existing + // prototype. + auto FI = FunctionProtos.find(Name); + if (FI != FunctionProtos.end()) + return FI->second->codegen(); + + // If no existing prototype exists, return null. + return nullptr; +} + +Value *NumberExprAST::codegen() { + return ConstantFP::get(TheContext, APFloat(Val)); +} + +Value *VariableExprAST::codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + if (!V) + return LogErrorV("Unknown variable name"); + return V; +} + +Value *BinaryExprAST::codegen() { + Value *L = LHS->codegen(); + Value *R = RHS->codegen(); + if (!L || !R) + return nullptr; + + switch (Op) { + case '+': + return Builder.CreateFAdd(L, R, "addtmp"); + case '-': + return Builder.CreateFSub(L, R, "subtmp"); + case '*': + return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext), "booltmp"); + default: + return LogErrorV("invalid binary operator"); + } +} + +Value *CallExprAST::codegen() { + // Look up the name in the global module table. + Function *CalleeF = getFunction(Callee); + if (!CalleeF) + return LogErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return LogErrorV("Incorrect # arguments passed"); + + std::vector<Value *> ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->codegen()); + if (!ArgsV.back()) + return nullptr; + } + + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); +} + +Value *IfExprAST::codegen() { + Value *CondV = Cond->codegen(); + if (!CondV) + return nullptr; + + // Convert condition to a bool by comparing non-equal to 0.0. + CondV = Builder.CreateFCmpONE( + CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond"); + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(TheContext, "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(TheContext, "else"); + BasicBlock *MergeBB = BasicBlock::Create(TheContext, "ifcont"); + + Builder.CreateCondBr(CondV, ThenBB, ElseBB); + + // Emit then value. + Builder.SetInsertPoint(ThenBB); + + Value *ThenV = Then->codegen(); + if (!ThenV) + return nullptr; + + Builder.CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder.GetInsertBlock(); + + // Emit else block. + TheFunction->getBasicBlockList().push_back(ElseBB); + Builder.SetInsertPoint(ElseBB); + + Value *ElseV = Else->codegen(); + if (!ElseV) + return nullptr; + + Builder.CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder.GetInsertBlock(); + + // Emit merge block. + TheFunction->getBasicBlockList().push_back(MergeBB); + Builder.SetInsertPoint(MergeBB); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, "iftmp"); + + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; +} + +// Output for-loop as: +// ... +// start = startexpr +// goto loop +// loop: +// variable = phi [start, loopheader], [nextvariable, loopend] +// ... +// bodyexpr +// ... +// loopend: +// step = stepexpr +// nextvariable = variable + step +// endcond = endexpr +// br endcond, loop, endloop +// outloop: +Value *ForExprAST::codegen() { + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->codegen(); + if (!StartVal) + return nullptr; + + // Make the new basic block for the loop header, inserting after current + // block. + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + BasicBlock *PreheaderBB = Builder.GetInsertBlock(); + BasicBlock *LoopBB = BasicBlock::Create(TheContext, "loop", TheFunction); + + // Insert an explicit fall through from the current block to the LoopBB. + Builder.CreateBr(LoopBB); + + // Start insertion in LoopBB. + Builder.SetInsertPoint(LoopBB); + + // Start the PHI node with an entry for Start. + PHINode *Variable = + Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, VarName); + Variable->addIncoming(StartVal, PreheaderBB); + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + Value *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Variable; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (!Body->codegen()) + return nullptr; + + // Emit the step value. + Value *StepVal = nullptr; + if (Step) { + StepVal = Step->codegen(); + if (!StepVal) + return nullptr; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(TheContext, APFloat(1.0)); + } + + Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar"); + + // Compute the end condition. + Value *EndCond = End->codegen(); + if (!EndCond) + return nullptr; + + // Convert condition to a bool by comparing non-equal to 0.0. + EndCond = Builder.CreateFCmpONE( + EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond"); + + // Create the "after loop" block and insert it. + BasicBlock *LoopEndBB = Builder.GetInsertBlock(); + BasicBlock *AfterBB = + BasicBlock::Create(TheContext, "afterloop", TheFunction); + + // Insert the conditional branch into the end of LoopEndBB. + Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + + // Any new code will be inserted in AfterBB. + Builder.SetInsertPoint(AfterBB); + + // Add a new entry to the PHI node for the backedge. + Variable->addIncoming(NextVar, LoopEndBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(TheContext)); +} + +Function *PrototypeAST::codegen() { + // Make the function type: double(double,double) etc. + std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(TheContext)); + FunctionType *FT = + FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false); + + Function *F = + Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); + + // Set names for all arguments. + unsigned Idx = 0; + for (auto &Arg : F->args()) + Arg.setName(Args[Idx++]); + + return F; +} + +Function *FunctionAST::codegen() { + // Transfer ownership of the prototype to the FunctionProtos map, but keep a + // reference to it for use below. + auto &P = *Proto; + FunctionProtos[Proto->getName()] = std::move(Proto); + Function *TheFunction = getFunction(P.getName()); + if (!TheFunction) + return nullptr; + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction); + Builder.SetInsertPoint(BB); + + // Record the function arguments in the NamedValues map. + NamedValues.clear(); + for (auto &Arg : TheFunction->args()) + NamedValues[std::string(Arg.getName())] = &Arg; + + if (Value *RetVal = Body->codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Run the optimizer on the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + return nullptr; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static void InitializeModuleAndPassManager() { + // Open a new module. + TheModule = std::make_unique<Module>("my cool jit", TheContext); + TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout()); + + // Create a new pass manager attached to it. + TheFPM = std::make_unique<legacy::FunctionPassManager>(TheModule.get()); + + // Do simple "peephole" optimizations and bit-twiddling optzns. + TheFPM->add(createInstructionCombiningPass()); + // Reassociate expressions. + TheFPM->add(createReassociatePass()); + // Eliminate Common SubExpressions. + TheFPM->add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + TheFPM->add(createCFGSimplificationPass()); + + TheFPM->doInitialization(); +} + +static void HandleDefinition() { + if (auto FnAST = ParseDefinition()) { + if (auto *FnIR = FnAST->codegen()) { + fprintf(stderr, "Read function definition:"); + FnIR->print(errs()); + fprintf(stderr, "\n"); + TheJIT->addModule(std::move(TheModule)); + InitializeModuleAndPassManager(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (auto ProtoAST = ParseExtern()) { + if (auto *FnIR = ProtoAST->codegen()) { + fprintf(stderr, "Read extern: "); + FnIR->print(errs()); + fprintf(stderr, "\n"); + FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (auto FnAST = ParseTopLevelExpr()) { + if (FnAST->codegen()) { + // JIT the module containing the anonymous expression, keeping a handle so + // we can free it later. + auto H = TheJIT->addModule(std::move(TheModule)); + InitializeModuleAndPassManager(); + + // Search the JIT for the __anon_expr symbol. + auto ExprSymbol = TheJIT->findSymbol("__anon_expr"); + assert(ExprSymbol && "Function not found"); + + // Get the symbol's address and cast it to the right type (takes no + // arguments, returns a double) so we can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); + fprintf(stderr, "Evaluated to %f\n", FP()); + + // Delete the anonymous expression module from the JIT. + TheJIT->removeModule(H); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (true) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: + return; + case ';': // ignore top-level semicolons. + getNextToken(); + break; + case tok_def: + HandleDefinition(); + break; + case tok_extern: + HandleExtern(); + break; + default: + HandleTopLevelExpression(); + break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +#ifdef _WIN32 +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT +#endif + +/// putchard - putchar that takes a double and returns 0. +extern "C" DLLEXPORT double putchard(double X) { + fputc((char)X, stderr); + return 0; +} + +/// printd - printf that takes a double prints it as "%f\n", returning 0. +extern "C" DLLEXPORT double printd(double X) { + fprintf(stderr, "%f\n", X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + InitializeNativeTargetAsmParser(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + TheJIT = std::make_unique<KaleidoscopeJIT>(); + + InitializeModuleAndPassManager(); + + // Run the main "interpreter loop" now. + MainLoop(); + + return 0; +} diff --git a/LLVM/cplusplus/toy6.cpp b/LLVM/cplusplus/toy6.cpp new file mode 100644 index 0000000..ec08671 --- /dev/null +++ b/LLVM/cplusplus/toy6.cpp @@ -0,0 +1,1094 @@ +// Next Line modified for the FbI/h_da.de version (only) +#include "KaleidoscopeJIT.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/InstCombine/InstCombine.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include <algorithm> +#include <cassert> +#include <cctype> +#include <cstdint> +#include <cstdio> +#include <cstdlib> +#include <map> +#include <memory> +#include <string> +#include <vector> + +using namespace llvm; +using namespace llvm::orc; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, + tok_extern = -3, + + // primary + tok_identifier = -4, + tok_number = -5, + + // control + tok_if = -6, + tok_then = -7, + tok_else = -8, + tok_for = -9, + tok_in = -10, + + // operators + tok_binary = -11, + tok_unary = -12 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") + return tok_def; + if (IdentifierStr == "extern") + return tok_extern; + if (IdentifierStr == "if") + return tok_if; + if (IdentifierStr == "then") + return tok_then; + if (IdentifierStr == "else") + return tok_else; + if (IdentifierStr == "for") + return tok_for; + if (IdentifierStr == "in") + return tok_in; + if (IdentifierStr == "binary") + return tok_binary; + if (IdentifierStr == "unary") + return tok_unary; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), nullptr); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do + LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +namespace { + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() = default; + + virtual Value *codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; + +public: + NumberExprAST(double Val) : Val(Val) {} + + Value *codegen() override; +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; + +public: + VariableExprAST(const std::string &Name) : Name(Name) {} + + Value *codegen() override; +}; + +/// UnaryExprAST - Expression class for a unary operator. +class UnaryExprAST : public ExprAST { + char Opcode; + std::unique_ptr<ExprAST> Operand; + +public: + UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand) + : Opcode(Opcode), Operand(std::move(Operand)) {} + + Value *codegen() override; +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + std::unique_ptr<ExprAST> LHS, RHS; + +public: + BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, + std::unique_ptr<ExprAST> RHS) + : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} + + Value *codegen() override; +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<std::unique_ptr<ExprAST>> Args; + +public: + CallExprAST(const std::string &Callee, + std::vector<std::unique_ptr<ExprAST>> Args) + : Callee(Callee), Args(std::move(Args)) {} + + Value *codegen() override; +}; + +/// IfExprAST - Expression class for if/then/else. +class IfExprAST : public ExprAST { + std::unique_ptr<ExprAST> Cond, Then, Else; + +public: + IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then, + std::unique_ptr<ExprAST> Else) + : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} + + Value *codegen() override; +}; + +/// ForExprAST - Expression class for for/in. +class ForExprAST : public ExprAST { + std::string VarName; + std::unique_ptr<ExprAST> Start, End, Step, Body; + +public: + ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start, + std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step, + std::unique_ptr<ExprAST> Body) + : VarName(VarName), Start(std::move(Start)), End(std::move(End)), + Step(std::move(Step)), Body(std::move(Body)) {} + + Value *codegen() override; +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes), as well as if it is an operator. +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; + bool IsOperator; + unsigned Precedence; // Precedence if a binary op. + +public: + PrototypeAST(const std::string &Name, std::vector<std::string> Args, + bool IsOperator = false, unsigned Prec = 0) + : Name(Name), Args(std::move(Args)), IsOperator(IsOperator), + Precedence(Prec) {} + + Function *codegen(); + const std::string &getName() const { return Name; } + + bool isUnaryOp() const { return IsOperator && Args.size() == 1; } + bool isBinaryOp() const { return IsOperator && Args.size() == 2; } + + char getOperatorName() const { + assert(isUnaryOp() || isBinaryOp()); + return Name[Name.size() - 1]; + } + + unsigned getBinaryPrecedence() const { return Precedence; } +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + std::unique_ptr<PrototypeAST> Proto; + std::unique_ptr<ExprAST> Body; + +public: + FunctionAST(std::unique_ptr<PrototypeAST> Proto, + std::unique_ptr<ExprAST> Body) + : Proto(std::move(Proto)), Body(std::move(Body)) {} + + Function *codegen(); +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { return CurTok = gettok(); } + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) + return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +std::unique_ptr<ExprAST> LogError(const char *Str) { + fprintf(stderr, "Error: %s\n", Str); + return nullptr; +} + +std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { + LogError(Str); + return nullptr; +} + +static std::unique_ptr<ExprAST> ParseExpression(); + +/// numberexpr ::= number +static std::unique_ptr<ExprAST> ParseNumberExpr() { + auto Result = std::make_unique<NumberExprAST>(NumVal); + getNextToken(); // consume the number + return std::move(Result); +} + +/// parenexpr ::= '(' expression ')' +static std::unique_ptr<ExprAST> ParseParenExpr() { + getNextToken(); // eat (. + auto V = ParseExpression(); + if (!V) + return nullptr; + + if (CurTok != ')') + return LogError("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static std::unique_ptr<ExprAST> ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return std::make_unique<VariableExprAST>(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<std::unique_ptr<ExprAST>> Args; + if (CurTok != ')') { + while (true) { + if (auto Arg = ParseExpression()) + Args.push_back(std::move(Arg)); + else + return nullptr; + + if (CurTok == ')') + break; + + if (CurTok != ',') + return LogError("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return std::make_unique<CallExprAST>(IdName, std::move(Args)); +} + +/// ifexpr ::= 'if' expression 'then' expression 'else' expression +static std::unique_ptr<ExprAST> ParseIfExpr() { + getNextToken(); // eat the if. + + // condition. + auto Cond = ParseExpression(); + if (!Cond) + return nullptr; + + if (CurTok != tok_then) + return LogError("expected then"); + getNextToken(); // eat the then + + auto Then = ParseExpression(); + if (!Then) + return nullptr; + + if (CurTok != tok_else) + return LogError("expected else"); + + getNextToken(); + + auto Else = ParseExpression(); + if (!Else) + return nullptr; + + return std::make_unique<IfExprAST>(std::move(Cond), std::move(Then), + std::move(Else)); +} + +/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression +static std::unique_ptr<ExprAST> ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return LogError("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return LogError("expected '=' after for"); + getNextToken(); // eat '='. + + auto Start = ParseExpression(); + if (!Start) + return nullptr; + if (CurTok != ',') + return LogError("expected ',' after for start value"); + getNextToken(); + + auto End = ParseExpression(); + if (!End) + return nullptr; + + // The step value is optional. + std::unique_ptr<ExprAST> Step; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (!Step) + return nullptr; + } + + if (CurTok != tok_in) + return LogError("expected 'in' after for"); + getNextToken(); // eat 'in'. + + auto Body = ParseExpression(); + if (!Body) + return nullptr; + + return std::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End), + std::move(Step), std::move(Body)); +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +/// ::= ifexpr +/// ::= forexpr +static std::unique_ptr<ExprAST> ParsePrimary() { + switch (CurTok) { + default: + return LogError("unknown token when expecting an expression"); + case tok_identifier: + return ParseIdentifierExpr(); + case tok_number: + return ParseNumberExpr(); + case '(': + return ParseParenExpr(); + case tok_if: + return ParseIfExpr(); + case tok_for: + return ParseForExpr(); + } +} + +/// unary +/// ::= primary +/// ::= '!' unary +static std::unique_ptr<ExprAST> ParseUnary() { + // If the current token is not an operator, it must be a primary expr. + if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') + return ParsePrimary(); + + // If this is a unary operator, read it. + int Opc = CurTok; + getNextToken(); + if (auto Operand = ParseUnary()) + return std::make_unique<UnaryExprAST>(Opc, std::move(Operand)); + return nullptr; +} + +/// binoprhs +/// ::= ('+' unary)* +static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, + std::unique_ptr<ExprAST> LHS) { + // If this is a binop, find its precedence. + while (true) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the unary expression after the binary operator. + auto RHS = ParseUnary(); + if (!RHS) + return nullptr; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); + if (!RHS) + return nullptr; + } + + // Merge LHS/RHS. + LHS = + std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS)); + } +} + +/// expression +/// ::= unary binoprhs +/// +static std::unique_ptr<ExprAST> ParseExpression() { + auto LHS = ParseUnary(); + if (!LHS) + return nullptr; + + return ParseBinOpRHS(0, std::move(LHS)); +} + +/// prototype +/// ::= id '(' id* ')' +/// ::= binary LETTER number? (id, id) +/// ::= unary LETTER (id) +static std::unique_ptr<PrototypeAST> ParsePrototype() { + std::string FnName; + + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. + unsigned BinaryPrecedence = 30; + + switch (CurTok) { + default: + return LogErrorP("Expected function name in prototype"); + case tok_identifier: + FnName = IdentifierStr; + Kind = 0; + getNextToken(); + break; + case tok_unary: + getNextToken(); + if (!isascii(CurTok)) + return LogErrorP("Expected unary operator"); + FnName = "unary"; + FnName += (char)CurTok; + Kind = 1; + getNextToken(); + break; + case tok_binary: + getNextToken(); + if (!isascii(CurTok)) + return LogErrorP("Expected binary operator"); + FnName = "binary"; + FnName += (char)CurTok; + Kind = 2; + getNextToken(); + + // Read the precedence if present. + if (CurTok == tok_number) { + if (NumVal < 1 || NumVal > 100) + return LogErrorP("Invalid precedence: must be 1..100"); + BinaryPrecedence = (unsigned)NumVal; + getNextToken(); + } + break; + } + + if (CurTok != '(') + return LogErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return LogErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + // Verify right number of names for operator. + if (Kind && ArgNames.size() != Kind) + return LogErrorP("Invalid number of operands for operator"); + + return std::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0, + BinaryPrecedence); +} + +/// definition ::= 'def' prototype expression +static std::unique_ptr<FunctionAST> ParseDefinition() { + getNextToken(); // eat def. + auto Proto = ParsePrototype(); + if (!Proto) + return nullptr; + + if (auto E = ParseExpression()) + return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); + return nullptr; +} + +/// toplevelexpr ::= expression +static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { + if (auto E = ParseExpression()) { + // Make an anonymous proto. + auto Proto = std::make_unique<PrototypeAST>("__anon_expr", + std::vector<std::string>()); + return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); + } + return nullptr; +} + +/// external ::= 'extern' prototype +static std::unique_ptr<PrototypeAST> ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static LLVMContext TheContext; +static IRBuilder<> Builder(TheContext); +static std::unique_ptr<Module> TheModule; +static std::map<std::string, Value *> NamedValues; +static std::unique_ptr<legacy::FunctionPassManager> TheFPM; +static std::unique_ptr<KaleidoscopeJIT> TheJIT; +static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos; + +Value *LogErrorV(const char *Str) { + LogError(Str); + return nullptr; +} + +Function *getFunction(std::string Name) { + // First, see if the function has already been added to the current module. + if (auto *F = TheModule->getFunction(Name)) + return F; + + // If not, check whether we can codegen the declaration from some existing + // prototype. + auto FI = FunctionProtos.find(Name); + if (FI != FunctionProtos.end()) + return FI->second->codegen(); + + // If no existing prototype exists, return null. + return nullptr; +} + +Value *NumberExprAST::codegen() { + return ConstantFP::get(TheContext, APFloat(Val)); +} + +Value *VariableExprAST::codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + if (!V) + return LogErrorV("Unknown variable name"); + return V; +} + +Value *UnaryExprAST::codegen() { + Value *OperandV = Operand->codegen(); + if (!OperandV) + return nullptr; + + Function *F = getFunction(std::string("unary") + Opcode); + if (!F) + return LogErrorV("Unknown unary operator"); + + return Builder.CreateCall(F, OperandV, "unop"); +} + +Value *BinaryExprAST::codegen() { + Value *L = LHS->codegen(); + Value *R = RHS->codegen(); + if (!L || !R) + return nullptr; + + switch (Op) { + case '+': + return Builder.CreateFAdd(L, R, "addtmp"); + case '-': + return Builder.CreateFSub(L, R, "subtmp"); + case '*': + return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext), "booltmp"); + default: + break; + } + + // If it wasn't a builtin binary operator, it must be a user defined one. Emit + // a call to it. + Function *F = getFunction(std::string("binary") + Op); + assert(F && "binary operator not found!"); + + Value *Ops[] = {L, R}; + return Builder.CreateCall(F, Ops, "binop"); +} + +Value *CallExprAST::codegen() { + // Look up the name in the global module table. + Function *CalleeF = getFunction(Callee); + if (!CalleeF) + return LogErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return LogErrorV("Incorrect # arguments passed"); + + std::vector<Value *> ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->codegen()); + if (!ArgsV.back()) + return nullptr; + } + + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); +} + +Value *IfExprAST::codegen() { + Value *CondV = Cond->codegen(); + if (!CondV) + return nullptr; + + // Convert condition to a bool by comparing non-equal to 0.0. + CondV = Builder.CreateFCmpONE( + CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond"); + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(TheContext, "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(TheContext, "else"); + BasicBlock *MergeBB = BasicBlock::Create(TheContext, "ifcont"); + + Builder.CreateCondBr(CondV, ThenBB, ElseBB); + + // Emit then value. + Builder.SetInsertPoint(ThenBB); + + Value *ThenV = Then->codegen(); + if (!ThenV) + return nullptr; + + Builder.CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder.GetInsertBlock(); + + // Emit else block. + TheFunction->getBasicBlockList().push_back(ElseBB); + Builder.SetInsertPoint(ElseBB); + + Value *ElseV = Else->codegen(); + if (!ElseV) + return nullptr; + + Builder.CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder.GetInsertBlock(); + + // Emit merge block. + TheFunction->getBasicBlockList().push_back(MergeBB); + Builder.SetInsertPoint(MergeBB); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, "iftmp"); + + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; +} + +// Output for-loop as: +// ... +// start = startexpr +// goto loop +// loop: +// variable = phi [start, loopheader], [nextvariable, loopend] +// ... +// bodyexpr +// ... +// loopend: +// step = stepexpr +// nextvariable = variable + step +// endcond = endexpr +// br endcond, loop, endloop +// outloop: +Value *ForExprAST::codegen() { + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->codegen(); + if (!StartVal) + return nullptr; + + // Make the new basic block for the loop header, inserting after current + // block. + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + BasicBlock *PreheaderBB = Builder.GetInsertBlock(); + BasicBlock *LoopBB = BasicBlock::Create(TheContext, "loop", TheFunction); + + // Insert an explicit fall through from the current block to the LoopBB. + Builder.CreateBr(LoopBB); + + // Start insertion in LoopBB. + Builder.SetInsertPoint(LoopBB); + + // Start the PHI node with an entry for Start. + PHINode *Variable = + Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, VarName); + Variable->addIncoming(StartVal, PreheaderBB); + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + Value *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Variable; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (!Body->codegen()) + return nullptr; + + // Emit the step value. + Value *StepVal = nullptr; + if (Step) { + StepVal = Step->codegen(); + if (!StepVal) + return nullptr; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(TheContext, APFloat(1.0)); + } + + Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar"); + + // Compute the end condition. + Value *EndCond = End->codegen(); + if (!EndCond) + return nullptr; + + // Convert condition to a bool by comparing non-equal to 0.0. + EndCond = Builder.CreateFCmpONE( + EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond"); + + // Create the "after loop" block and insert it. + BasicBlock *LoopEndBB = Builder.GetInsertBlock(); + BasicBlock *AfterBB = + BasicBlock::Create(TheContext, "afterloop", TheFunction); + + // Insert the conditional branch into the end of LoopEndBB. + Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + + // Any new code will be inserted in AfterBB. + Builder.SetInsertPoint(AfterBB); + + // Add a new entry to the PHI node for the backedge. + Variable->addIncoming(NextVar, LoopEndBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(TheContext)); +} + +Function *PrototypeAST::codegen() { + // Make the function type: double(double,double) etc. + std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(TheContext)); + FunctionType *FT = + FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false); + + Function *F = + Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); + + // Set names for all arguments. + unsigned Idx = 0; + for (auto &Arg : F->args()) + Arg.setName(Args[Idx++]); + + return F; +} + +Function *FunctionAST::codegen() { + // Transfer ownership of the prototype to the FunctionProtos map, but keep a + // reference to it for use below. + auto &P = *Proto; + FunctionProtos[Proto->getName()] = std::move(Proto); + Function *TheFunction = getFunction(P.getName()); + if (!TheFunction) + return nullptr; + + // If this is an operator, install it. + if (P.isBinaryOp()) + BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence(); + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction); + Builder.SetInsertPoint(BB); + + // Record the function arguments in the NamedValues map. + NamedValues.clear(); + for (auto &Arg : TheFunction->args()) + NamedValues[std::string(Arg.getName())] = &Arg; + + if (Value *RetVal = Body->codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Run the optimizer on the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + + if (P.isBinaryOp()) + BinopPrecedence.erase(P.getOperatorName()); + return nullptr; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static void InitializeModuleAndPassManager() { + // Open a new module. + TheModule = std::make_unique<Module>("my cool jit", TheContext); + TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout()); + + // Create a new pass manager attached to it. + TheFPM = std::make_unique<legacy::FunctionPassManager>(TheModule.get()); + + // Do simple "peephole" optimizations and bit-twiddling optzns. + TheFPM->add(createInstructionCombiningPass()); + // Reassociate expressions. + TheFPM->add(createReassociatePass()); + // Eliminate Common SubExpressions. + TheFPM->add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + TheFPM->add(createCFGSimplificationPass()); + + TheFPM->doInitialization(); +} + +static void HandleDefinition() { + if (auto FnAST = ParseDefinition()) { + if (auto *FnIR = FnAST->codegen()) { + fprintf(stderr, "Read function definition:"); + FnIR->print(errs()); + fprintf(stderr, "\n"); + TheJIT->addModule(std::move(TheModule)); + InitializeModuleAndPassManager(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (auto ProtoAST = ParseExtern()) { + if (auto *FnIR = ProtoAST->codegen()) { + fprintf(stderr, "Read extern: "); + FnIR->print(errs()); + fprintf(stderr, "\n"); + FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (auto FnAST = ParseTopLevelExpr()) { + if (FnAST->codegen()) { + // JIT the module containing the anonymous expression, keeping a handle so + // we can free it later. + auto H = TheJIT->addModule(std::move(TheModule)); + InitializeModuleAndPassManager(); + + // Search the JIT for the __anon_expr symbol. + auto ExprSymbol = TheJIT->findSymbol("__anon_expr"); + assert(ExprSymbol && "Function not found"); + + // Get the symbol's address and cast it to the right type (takes no + // arguments, returns a double) so we can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); + fprintf(stderr, "Evaluated to %f\n", FP()); + + // Delete the anonymous expression module from the JIT. + TheJIT->removeModule(H); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (true) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: + return; + case ';': // ignore top-level semicolons. + getNextToken(); + break; + case tok_def: + HandleDefinition(); + break; + case tok_extern: + HandleExtern(); + break; + default: + HandleTopLevelExpression(); + break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +#ifdef _WIN32 +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT +#endif + +/// putchard - putchar that takes a double and returns 0. +extern "C" DLLEXPORT double putchard(double X) { + fputc((char)X, stderr); + return 0; +} + +/// printd - printf that takes a double prints it as "%f\n", returning 0. +extern "C" DLLEXPORT double printd(double X) { + fprintf(stderr, "%f\n", X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + InitializeNativeTargetAsmParser(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + TheJIT = std::make_unique<KaleidoscopeJIT>(); + + InitializeModuleAndPassManager(); + + // Run the main "interpreter loop" now. + MainLoop(); + + return 0; +} diff --git a/LLVM/cplusplus/toy7.cpp b/LLVM/cplusplus/toy7.cpp new file mode 100644 index 0000000..f6a3494 --- /dev/null +++ b/LLVM/cplusplus/toy7.cpp @@ -0,0 +1,1265 @@ +// Next Line modified for the FbI/h_da.de version (only) +#include "KaleidoscopeJIT.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/InstCombine/InstCombine.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Utils.h" +#include <algorithm> +#include <cassert> +#include <cctype> +#include <cstdint> +#include <cstdio> +#include <cstdlib> +#include <map> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +using namespace llvm; +using namespace llvm::orc; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, + tok_extern = -3, + + // primary + tok_identifier = -4, + tok_number = -5, + + // control + tok_if = -6, + tok_then = -7, + tok_else = -8, + tok_for = -9, + tok_in = -10, + + // operators + tok_binary = -11, + tok_unary = -12, + + // var definition + tok_var = -13 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") + return tok_def; + if (IdentifierStr == "extern") + return tok_extern; + if (IdentifierStr == "if") + return tok_if; + if (IdentifierStr == "then") + return tok_then; + if (IdentifierStr == "else") + return tok_else; + if (IdentifierStr == "for") + return tok_for; + if (IdentifierStr == "in") + return tok_in; + if (IdentifierStr == "binary") + return tok_binary; + if (IdentifierStr == "unary") + return tok_unary; + if (IdentifierStr == "var") + return tok_var; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), nullptr); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do + LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +namespace { + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() = default; + + virtual Value *codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; + +public: + NumberExprAST(double Val) : Val(Val) {} + + Value *codegen() override; +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; + +public: + VariableExprAST(const std::string &Name) : Name(Name) {} + + Value *codegen() override; + const std::string &getName() const { return Name; } +}; + +/// UnaryExprAST - Expression class for a unary operator. +class UnaryExprAST : public ExprAST { + char Opcode; + std::unique_ptr<ExprAST> Operand; + +public: + UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand) + : Opcode(Opcode), Operand(std::move(Operand)) {} + + Value *codegen() override; +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + std::unique_ptr<ExprAST> LHS, RHS; + +public: + BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, + std::unique_ptr<ExprAST> RHS) + : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} + + Value *codegen() override; +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<std::unique_ptr<ExprAST>> Args; + +public: + CallExprAST(const std::string &Callee, + std::vector<std::unique_ptr<ExprAST>> Args) + : Callee(Callee), Args(std::move(Args)) {} + + Value *codegen() override; +}; + +/// IfExprAST - Expression class for if/then/else. +class IfExprAST : public ExprAST { + std::unique_ptr<ExprAST> Cond, Then, Else; + +public: + IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then, + std::unique_ptr<ExprAST> Else) + : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} + + Value *codegen() override; +}; + +/// ForExprAST - Expression class for for/in. +class ForExprAST : public ExprAST { + std::string VarName; + std::unique_ptr<ExprAST> Start, End, Step, Body; + +public: + ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start, + std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step, + std::unique_ptr<ExprAST> Body) + : VarName(VarName), Start(std::move(Start)), End(std::move(End)), + Step(std::move(Step)), Body(std::move(Body)) {} + + Value *codegen() override; +}; + +/// VarExprAST - Expression class for var/in +class VarExprAST : public ExprAST { + std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; + std::unique_ptr<ExprAST> Body; + +public: + VarExprAST( + std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames, + std::unique_ptr<ExprAST> Body) + : VarNames(std::move(VarNames)), Body(std::move(Body)) {} + + Value *codegen() override; +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes), as well as if it is an operator. +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; + bool IsOperator; + unsigned Precedence; // Precedence if a binary op. + +public: + PrototypeAST(const std::string &Name, std::vector<std::string> Args, + bool IsOperator = false, unsigned Prec = 0) + : Name(Name), Args(std::move(Args)), IsOperator(IsOperator), + Precedence(Prec) {} + + Function *codegen(); + const std::string &getName() const { return Name; } + + bool isUnaryOp() const { return IsOperator && Args.size() == 1; } + bool isBinaryOp() const { return IsOperator && Args.size() == 2; } + + char getOperatorName() const { + assert(isUnaryOp() || isBinaryOp()); + return Name[Name.size() - 1]; + } + + unsigned getBinaryPrecedence() const { return Precedence; } +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + std::unique_ptr<PrototypeAST> Proto; + std::unique_ptr<ExprAST> Body; + +public: + FunctionAST(std::unique_ptr<PrototypeAST> Proto, + std::unique_ptr<ExprAST> Body) + : Proto(std::move(Proto)), Body(std::move(Body)) {} + + Function *codegen(); +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { return CurTok = gettok(); } + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) + return -1; + return TokPrec; +} + +/// LogError* - These are little helper functions for error handling. +std::unique_ptr<ExprAST> LogError(const char *Str) { + fprintf(stderr, "Error: %s\n", Str); + return nullptr; +} + +std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { + LogError(Str); + return nullptr; +} + +static std::unique_ptr<ExprAST> ParseExpression(); + +/// numberexpr ::= number +static std::unique_ptr<ExprAST> ParseNumberExpr() { + auto Result = std::make_unique<NumberExprAST>(NumVal); + getNextToken(); // consume the number + return std::move(Result); +} + +/// parenexpr ::= '(' expression ')' +static std::unique_ptr<ExprAST> ParseParenExpr() { + getNextToken(); // eat (. + auto V = ParseExpression(); + if (!V) + return nullptr; + + if (CurTok != ')') + return LogError("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static std::unique_ptr<ExprAST> ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return std::make_unique<VariableExprAST>(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<std::unique_ptr<ExprAST>> Args; + if (CurTok != ')') { + while (true) { + if (auto Arg = ParseExpression()) + Args.push_back(std::move(Arg)); + else + return nullptr; + + if (CurTok == ')') + break; + + if (CurTok != ',') + return LogError("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return std::make_unique<CallExprAST>(IdName, std::move(Args)); +} + +/// ifexpr ::= 'if' expression 'then' expression 'else' expression +static std::unique_ptr<ExprAST> ParseIfExpr() { + getNextToken(); // eat the if. + + // condition. + auto Cond = ParseExpression(); + if (!Cond) + return nullptr; + + if (CurTok != tok_then) + return LogError("expected then"); + getNextToken(); // eat the then + + auto Then = ParseExpression(); + if (!Then) + return nullptr; + + if (CurTok != tok_else) + return LogError("expected else"); + + getNextToken(); + + auto Else = ParseExpression(); + if (!Else) + return nullptr; + + return std::make_unique<IfExprAST>(std::move(Cond), std::move(Then), + std::move(Else)); +} + +/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression +static std::unique_ptr<ExprAST> ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return LogError("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return LogError("expected '=' after for"); + getNextToken(); // eat '='. + + auto Start = ParseExpression(); + if (!Start) + return nullptr; + if (CurTok != ',') + return LogError("expected ',' after for start value"); + getNextToken(); + + auto End = ParseExpression(); + if (!End) + return nullptr; + + // The step value is optional. + std::unique_ptr<ExprAST> Step; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (!Step) + return nullptr; + } + + if (CurTok != tok_in) + return LogError("expected 'in' after for"); + getNextToken(); // eat 'in'. + + auto Body = ParseExpression(); + if (!Body) + return nullptr; + + return std::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End), + std::move(Step), std::move(Body)); +} + +/// varexpr ::= 'var' identifier ('=' expression)? +// (',' identifier ('=' expression)?)* 'in' expression +static std::unique_ptr<ExprAST> ParseVarExpr() { + getNextToken(); // eat the var. + + std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; + + // At least one variable name is required. + if (CurTok != tok_identifier) + return LogError("expected identifier after var"); + + while (true) { + std::string Name = IdentifierStr; + getNextToken(); // eat identifier. + + // Read the optional initializer. + std::unique_ptr<ExprAST> Init = nullptr; + if (CurTok == '=') { + getNextToken(); // eat the '='. + + Init = ParseExpression(); + if (!Init) + return nullptr; + } + + VarNames.push_back(std::make_pair(Name, std::move(Init))); + + // End of var list, exit loop. + if (CurTok != ',') + break; + getNextToken(); // eat the ','. + + if (CurTok != tok_identifier) + return LogError("expected identifier list after var"); + } + + // At this point, we have to have 'in'. + if (CurTok != tok_in) + return LogError("expected 'in' keyword after 'var'"); + getNextToken(); // eat 'in'. + + auto Body = ParseExpression(); + if (!Body) + return nullptr; + + return std::make_unique<VarExprAST>(std::move(VarNames), std::move(Body)); +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +/// ::= ifexpr +/// ::= forexpr +/// ::= varexpr +static std::unique_ptr<ExprAST> ParsePrimary() { + switch (CurTok) { + default: + return LogError("unknown token when expecting an expression"); + case tok_identifier: + return ParseIdentifierExpr(); + case tok_number: + return ParseNumberExpr(); + case '(': + return ParseParenExpr(); + case tok_if: + return ParseIfExpr(); + case tok_for: + return ParseForExpr(); + case tok_var: + return ParseVarExpr(); + } +} + +/// unary +/// ::= primary +/// ::= '!' unary +static std::unique_ptr<ExprAST> ParseUnary() { + // If the current token is not an operator, it must be a primary expr. + if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') + return ParsePrimary(); + + // If this is a unary operator, read it. + int Opc = CurTok; + getNextToken(); + if (auto Operand = ParseUnary()) + return std::make_unique<UnaryExprAST>(Opc, std::move(Operand)); + return nullptr; +} + +/// binoprhs +/// ::= ('+' unary)* +static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, + std::unique_ptr<ExprAST> LHS) { + // If this is a binop, find its precedence. + while (true) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the unary expression after the binary operator. + auto RHS = ParseUnary(); + if (!RHS) + return nullptr; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); + if (!RHS) + return nullptr; + } + + // Merge LHS/RHS. + LHS = + std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS)); + } +} + +/// expression +/// ::= unary binoprhs +/// +static std::unique_ptr<ExprAST> ParseExpression() { + auto LHS = ParseUnary(); + if (!LHS) + return nullptr; + + return ParseBinOpRHS(0, std::move(LHS)); +} + +/// prototype +/// ::= id '(' id* ')' +/// ::= binary LETTER number? (id, id) +/// ::= unary LETTER (id) +static std::unique_ptr<PrototypeAST> ParsePrototype() { + std::string FnName; + + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. + unsigned BinaryPrecedence = 30; + + switch (CurTok) { + default: + return LogErrorP("Expected function name in prototype"); + case tok_identifier: + FnName = IdentifierStr; + Kind = 0; + getNextToken(); + break; + case tok_unary: + getNextToken(); + if (!isascii(CurTok)) + return LogErrorP("Expected unary operator"); + FnName = "unary"; + FnName += (char)CurTok; + Kind = 1; + getNextToken(); + break; + case tok_binary: + getNextToken(); + if (!isascii(CurTok)) + return LogErrorP("Expected binary operator"); + FnName = "binary"; + FnName += (char)CurTok; + Kind = 2; + getNextToken(); + + // Read the precedence if present. + if (CurTok == tok_number) { + if (NumVal < 1 || NumVal > 100) + return LogErrorP("Invalid precedence: must be 1..100"); + BinaryPrecedence = (unsigned)NumVal; + getNextToken(); + } + break; + } + + if (CurTok != '(') + return LogErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return LogErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + // Verify right number of names for operator. + if (Kind && ArgNames.size() != Kind) + return LogErrorP("Invalid number of operands for operator"); + + return std::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0, + BinaryPrecedence); +} + +/// definition ::= 'def' prototype expression +static std::unique_ptr<FunctionAST> ParseDefinition() { + getNextToken(); // eat def. + auto Proto = ParsePrototype(); + if (!Proto) + return nullptr; + + if (auto E = ParseExpression()) + return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); + return nullptr; +} + +/// toplevelexpr ::= expression +static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { + if (auto E = ParseExpression()) { + // Make an anonymous proto. + auto Proto = std::make_unique<PrototypeAST>("__anon_expr", + std::vector<std::string>()); + return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); + } + return nullptr; +} + +/// external ::= 'extern' prototype +static std::unique_ptr<PrototypeAST> ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static LLVMContext TheContext; +static IRBuilder<> Builder(TheContext); +static std::unique_ptr<Module> TheModule; +static std::map<std::string, AllocaInst *> NamedValues; +static std::unique_ptr<legacy::FunctionPassManager> TheFPM; +static std::unique_ptr<KaleidoscopeJIT> TheJIT; +static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos; + +Value *LogErrorV(const char *Str) { + LogError(Str); + return nullptr; +} + +Function *getFunction(std::string Name) { + // First, see if the function has already been added to the current module. + if (auto *F = TheModule->getFunction(Name)) + return F; + + // If not, check whether we can codegen the declaration from some existing + // prototype. + auto FI = FunctionProtos.find(Name); + if (FI != FunctionProtos.end()) + return FI->second->codegen(); + + // If no existing prototype exists, return null. + return nullptr; +} + +/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of +/// the function. This is used for mutable variables etc. +static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, + StringRef VarName) { + IRBuilder<> TmpB(&TheFunction->getEntryBlock(), + TheFunction->getEntryBlock().begin()); + return TmpB.CreateAlloca(Type::getDoubleTy(TheContext), nullptr, VarName); +} + +Value *NumberExprAST::codegen() { + return ConstantFP::get(TheContext, APFloat(Val)); +} + +Value *VariableExprAST::codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + if (!V) + return LogErrorV("Unknown variable name"); + + // Load the value. + return Builder.CreateLoad(V, Name.c_str()); +} + +Value *UnaryExprAST::codegen() { + Value *OperandV = Operand->codegen(); + if (!OperandV) + return nullptr; + + Function *F = getFunction(std::string("unary") + Opcode); + if (!F) + return LogErrorV("Unknown unary operator"); + + return Builder.CreateCall(F, OperandV, "unop"); +} + +Value *BinaryExprAST::codegen() { + // Special case '=' because we don't want to emit the LHS as an expression. + if (Op == '=') { + // Assignment requires the LHS to be an identifier. + // This assume we're building without RTTI because LLVM builds that way by + // default. If you build LLVM with RTTI this can be changed to a + // dynamic_cast for automatic error checking. + VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get()); + if (!LHSE) + return LogErrorV("destination of '=' must be a variable"); + // Codegen the RHS. + Value *Val = RHS->codegen(); + if (!Val) + return nullptr; + + // Look up the name. + Value *Variable = NamedValues[LHSE->getName()]; + if (!Variable) + return LogErrorV("Unknown variable name"); + + Builder.CreateStore(Val, Variable); + return Val; + } + + Value *L = LHS->codegen(); + Value *R = RHS->codegen(); + if (!L || !R) + return nullptr; + + switch (Op) { + case '+': + return Builder.CreateFAdd(L, R, "addtmp"); + case '-': + return Builder.CreateFSub(L, R, "subtmp"); + case '*': + return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext), "booltmp"); + default: + break; + } + + // If it wasn't a builtin binary operator, it must be a user defined one. Emit + // a call to it. + Function *F = getFunction(std::string("binary") + Op); + assert(F && "binary operator not found!"); + + Value *Ops[] = {L, R}; + return Builder.CreateCall(F, Ops, "binop"); +} + +Value *CallExprAST::codegen() { + // Look up the name in the global module table. + Function *CalleeF = getFunction(Callee); + if (!CalleeF) + return LogErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return LogErrorV("Incorrect # arguments passed"); + + std::vector<Value *> ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->codegen()); + if (!ArgsV.back()) + return nullptr; + } + + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); +} + +Value *IfExprAST::codegen() { + Value *CondV = Cond->codegen(); + if (!CondV) + return nullptr; + + // Convert condition to a bool by comparing non-equal to 0.0. + CondV = Builder.CreateFCmpONE( + CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond"); + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(TheContext, "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(TheContext, "else"); + BasicBlock *MergeBB = BasicBlock::Create(TheContext, "ifcont"); + + Builder.CreateCondBr(CondV, ThenBB, ElseBB); + + // Emit then value. + Builder.SetInsertPoint(ThenBB); + + Value *ThenV = Then->codegen(); + if (!ThenV) + return nullptr; + + Builder.CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder.GetInsertBlock(); + + // Emit else block. + TheFunction->getBasicBlockList().push_back(ElseBB); + Builder.SetInsertPoint(ElseBB); + + Value *ElseV = Else->codegen(); + if (!ElseV) + return nullptr; + + Builder.CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder.GetInsertBlock(); + + // Emit merge block. + TheFunction->getBasicBlockList().push_back(MergeBB); + Builder.SetInsertPoint(MergeBB); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, "iftmp"); + + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; +} + +// Output for-loop as: +// var = alloca double +// ... +// start = startexpr +// store start -> var +// goto loop +// loop: +// ... +// bodyexpr +// ... +// loopend: +// step = stepexpr +// endcond = endexpr +// +// curvar = load var +// nextvar = curvar + step +// store nextvar -> var +// br endcond, loop, endloop +// outloop: +Value *ForExprAST::codegen() { + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create an alloca for the variable in the entry block. + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); + + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->codegen(); + if (!StartVal) + return nullptr; + + // Store the value into the alloca. + Builder.CreateStore(StartVal, Alloca); + + // Make the new basic block for the loop header, inserting after current + // block. + BasicBlock *LoopBB = BasicBlock::Create(TheContext, "loop", TheFunction); + + // Insert an explicit fall through from the current block to the LoopBB. + Builder.CreateBr(LoopBB); + + // Start insertion in LoopBB. + Builder.SetInsertPoint(LoopBB); + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + AllocaInst *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Alloca; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (!Body->codegen()) + return nullptr; + + // Emit the step value. + Value *StepVal = nullptr; + if (Step) { + StepVal = Step->codegen(); + if (!StepVal) + return nullptr; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(TheContext, APFloat(1.0)); + } + + // Compute the end condition. + Value *EndCond = End->codegen(); + if (!EndCond) + return nullptr; + + // Reload, increment, and restore the alloca. This handles the case where + // the body of the loop mutates the variable. + Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str()); + Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar"); + Builder.CreateStore(NextVar, Alloca); + + // Convert condition to a bool by comparing non-equal to 0.0. + EndCond = Builder.CreateFCmpONE( + EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond"); + + // Create the "after loop" block and insert it. + BasicBlock *AfterBB = + BasicBlock::Create(TheContext, "afterloop", TheFunction); + + // Insert the conditional branch into the end of LoopEndBB. + Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + + // Any new code will be inserted in AfterBB. + Builder.SetInsertPoint(AfterBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(TheContext)); +} + +Value *VarExprAST::codegen() { + std::vector<AllocaInst *> OldBindings; + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Register all variables and emit their initializer. + for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { + const std::string &VarName = VarNames[i].first; + ExprAST *Init = VarNames[i].second.get(); + + // Emit the initializer before adding the variable to scope, this prevents + // the initializer from referencing the variable itself, and permits stuff + // like this: + // var a = 1 in + // var a = a in ... # refers to outer 'a'. + Value *InitVal; + if (Init) { + InitVal = Init->codegen(); + if (!InitVal) + return nullptr; + } else { // If not specified, use 0.0. + InitVal = ConstantFP::get(TheContext, APFloat(0.0)); + } + + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); + Builder.CreateStore(InitVal, Alloca); + + // Remember the old variable binding so that we can restore the binding when + // we unrecurse. + OldBindings.push_back(NamedValues[VarName]); + + // Remember this binding. + NamedValues[VarName] = Alloca; + } + + // Codegen the body, now that all vars are in scope. + Value *BodyVal = Body->codegen(); + if (!BodyVal) + return nullptr; + + // Pop all our variables from scope. + for (unsigned i = 0, e = VarNames.size(); i != e; ++i) + NamedValues[VarNames[i].first] = OldBindings[i]; + + // Return the body computation. + return BodyVal; +} + +Function *PrototypeAST::codegen() { + // Make the function type: double(double,double) etc. + std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(TheContext)); + FunctionType *FT = + FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false); + + Function *F = + Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); + + // Set names for all arguments. + unsigned Idx = 0; + for (auto &Arg : F->args()) + Arg.setName(Args[Idx++]); + + return F; +} + +Function *FunctionAST::codegen() { + // Transfer ownership of the prototype to the FunctionProtos map, but keep a + // reference to it for use below. + auto &P = *Proto; + FunctionProtos[Proto->getName()] = std::move(Proto); + Function *TheFunction = getFunction(P.getName()); + if (!TheFunction) + return nullptr; + + // If this is an operator, install it. + if (P.isBinaryOp()) + BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence(); + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction); + Builder.SetInsertPoint(BB); + + // Record the function arguments in the NamedValues map. + NamedValues.clear(); + for (auto &Arg : TheFunction->args()) { + // Create an alloca for this variable. + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName()); + + // Store the initial value into the alloca. + Builder.CreateStore(&Arg, Alloca); + + // Add arguments to variable symbol table. + NamedValues[std::string(Arg.getName())] = Alloca; + } + + if (Value *RetVal = Body->codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Run the optimizer on the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + + if (P.isBinaryOp()) + BinopPrecedence.erase(P.getOperatorName()); + return nullptr; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static void InitializeModuleAndPassManager() { + // Open a new module. + TheModule = std::make_unique<Module>("my cool jit", TheContext); + TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout()); + + // Create a new pass manager attached to it. + TheFPM = std::make_unique<legacy::FunctionPassManager>(TheModule.get()); + + // Promote allocas to registers. + TheFPM->add(createPromoteMemoryToRegisterPass()); + // Do simple "peephole" optimizations and bit-twiddling optzns. + TheFPM->add(createInstructionCombiningPass()); + // Reassociate expressions. + TheFPM->add(createReassociatePass()); + // Eliminate Common SubExpressions. + TheFPM->add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + TheFPM->add(createCFGSimplificationPass()); + + TheFPM->doInitialization(); +} + +static void HandleDefinition() { + if (auto FnAST = ParseDefinition()) { + if (auto *FnIR = FnAST->codegen()) { + fprintf(stderr, "Read function definition:"); + FnIR->print(errs()); + fprintf(stderr, "\n"); + TheJIT->addModule(std::move(TheModule)); + InitializeModuleAndPassManager(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (auto ProtoAST = ParseExtern()) { + if (auto *FnIR = ProtoAST->codegen()) { + fprintf(stderr, "Read extern: "); + FnIR->print(errs()); + fprintf(stderr, "\n"); + FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (auto FnAST = ParseTopLevelExpr()) { + if (FnAST->codegen()) { + // JIT the module containing the anonymous expression, keeping a handle so + // we can free it later. + auto H = TheJIT->addModule(std::move(TheModule)); + InitializeModuleAndPassManager(); + + // Search the JIT for the __anon_expr symbol. + auto ExprSymbol = TheJIT->findSymbol("__anon_expr"); + assert(ExprSymbol && "Function not found"); + + // Get the symbol's address and cast it to the right type (takes no + // arguments, returns a double) so we can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); + fprintf(stderr, "Evaluated to %f\n", FP()); + + // Delete the anonymous expression module from the JIT. + TheJIT->removeModule(H); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (true) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: + return; + case ';': // ignore top-level semicolons. + getNextToken(); + break; + case tok_def: + HandleDefinition(); + break; + case tok_extern: + HandleExtern(); + break; + default: + HandleTopLevelExpression(); + break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +#ifdef _WIN32 +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT +#endif + +/// putchard - putchar that takes a double and returns 0. +extern "C" DLLEXPORT double putchard(double X) { + fputc((char)X, stderr); + return 0; +} + +/// printd - printf that takes a double prints it as "%f\n", returning 0. +extern "C" DLLEXPORT double printd(double X) { + fprintf(stderr, "%f\n", X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + InitializeNativeTargetAsmParser(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['='] = 2; + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + TheJIT = std::make_unique<KaleidoscopeJIT>(); + + InitializeModuleAndPassManager(); + + // Run the main "interpreter loop" now. + MainLoop(); + + return 0; +} diff --git a/LLVM/cplusplus/toy8.cpp b/LLVM/cplusplus/toy8.cpp new file mode 100644 index 0000000..b11cea4 --- /dev/null +++ b/LLVM/cplusplus/toy8.cpp @@ -0,0 +1,1270 @@ +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <algorithm> +#include <cassert> +#include <cctype> +#include <cstdio> +#include <cstdlib> +#include <map> +#include <memory> +#include <string> +#include <system_error> +#include <utility> +#include <vector> + +using namespace llvm; +using namespace llvm::sys; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, + tok_extern = -3, + + // primary + tok_identifier = -4, + tok_number = -5, + + // control + tok_if = -6, + tok_then = -7, + tok_else = -8, + tok_for = -9, + tok_in = -10, + + // operators + tok_binary = -11, + tok_unary = -12, + + // var definition + tok_var = -13 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") + return tok_def; + if (IdentifierStr == "extern") + return tok_extern; + if (IdentifierStr == "if") + return tok_if; + if (IdentifierStr == "then") + return tok_then; + if (IdentifierStr == "else") + return tok_else; + if (IdentifierStr == "for") + return tok_for; + if (IdentifierStr == "in") + return tok_in; + if (IdentifierStr == "binary") + return tok_binary; + if (IdentifierStr == "unary") + return tok_unary; + if (IdentifierStr == "var") + return tok_var; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), nullptr); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do + LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +namespace { + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() = default; + + virtual Value *codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; + +public: + NumberExprAST(double Val) : Val(Val) {} + + Value *codegen() override; +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; + +public: + VariableExprAST(const std::string &Name) : Name(Name) {} + + Value *codegen() override; + const std::string &getName() const { return Name; } +}; + +/// UnaryExprAST - Expression class for a unary operator. +class UnaryExprAST : public ExprAST { + char Opcode; + std::unique_ptr<ExprAST> Operand; + +public: + UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand) + : Opcode(Opcode), Operand(std::move(Operand)) {} + + Value *codegen() override; +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + std::unique_ptr<ExprAST> LHS, RHS; + +public: + BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, + std::unique_ptr<ExprAST> RHS) + : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} + + Value *codegen() override; +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<std::unique_ptr<ExprAST>> Args; + +public: + CallExprAST(const std::string &Callee, + std::vector<std::unique_ptr<ExprAST>> Args) + : Callee(Callee), Args(std::move(Args)) {} + + Value *codegen() override; +}; + +/// IfExprAST - Expression class for if/then/else. +class IfExprAST : public ExprAST { + std::unique_ptr<ExprAST> Cond, Then, Else; + +public: + IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then, + std::unique_ptr<ExprAST> Else) + : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} + + Value *codegen() override; +}; + +/// ForExprAST - Expression class for for/in. +class ForExprAST : public ExprAST { + std::string VarName; + std::unique_ptr<ExprAST> Start, End, Step, Body; + +public: + ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start, + std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step, + std::unique_ptr<ExprAST> Body) + : VarName(VarName), Start(std::move(Start)), End(std::move(End)), + Step(std::move(Step)), Body(std::move(Body)) {} + + Value *codegen() override; +}; + +/// VarExprAST - Expression class for var/in +class VarExprAST : public ExprAST { + std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; + std::unique_ptr<ExprAST> Body; + +public: + VarExprAST( + std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames, + std::unique_ptr<ExprAST> Body) + : VarNames(std::move(VarNames)), Body(std::move(Body)) {} + + Value *codegen() override; +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes), as well as if it is an operator. +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; + bool IsOperator; + unsigned Precedence; // Precedence if a binary op. + +public: + PrototypeAST(const std::string &Name, std::vector<std::string> Args, + bool IsOperator = false, unsigned Prec = 0) + : Name(Name), Args(std::move(Args)), IsOperator(IsOperator), + Precedence(Prec) {} + + Function *codegen(); + const std::string &getName() const { return Name; } + + bool isUnaryOp() const { return IsOperator && Args.size() == 1; } + bool isBinaryOp() const { return IsOperator && Args.size() == 2; } + + char getOperatorName() const { + assert(isUnaryOp() || isBinaryOp()); + return Name[Name.size() - 1]; + } + + unsigned getBinaryPrecedence() const { return Precedence; } +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + std::unique_ptr<PrototypeAST> Proto; + std::unique_ptr<ExprAST> Body; + +public: + FunctionAST(std::unique_ptr<PrototypeAST> Proto, + std::unique_ptr<ExprAST> Body) + : Proto(std::move(Proto)), Body(std::move(Body)) {} + + Function *codegen(); +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { return CurTok = gettok(); } + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) + return -1; + return TokPrec; +} + +/// LogError* - These are little helper functions for error handling. +std::unique_ptr<ExprAST> LogError(const char *Str) { + fprintf(stderr, "Error: %s\n", Str); + return nullptr; +} + +std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { + LogError(Str); + return nullptr; +} + +static std::unique_ptr<ExprAST> ParseExpression(); + +/// numberexpr ::= number +static std::unique_ptr<ExprAST> ParseNumberExpr() { + auto Result = std::make_unique<NumberExprAST>(NumVal); + getNextToken(); // consume the number + return std::move(Result); +} + +/// parenexpr ::= '(' expression ')' +static std::unique_ptr<ExprAST> ParseParenExpr() { + getNextToken(); // eat (. + auto V = ParseExpression(); + if (!V) + return nullptr; + + if (CurTok != ')') + return LogError("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static std::unique_ptr<ExprAST> ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return std::make_unique<VariableExprAST>(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<std::unique_ptr<ExprAST>> Args; + if (CurTok != ')') { + while (true) { + if (auto Arg = ParseExpression()) + Args.push_back(std::move(Arg)); + else + return nullptr; + + if (CurTok == ')') + break; + + if (CurTok != ',') + return LogError("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return std::make_unique<CallExprAST>(IdName, std::move(Args)); +} + +/// ifexpr ::= 'if' expression 'then' expression 'else' expression +static std::unique_ptr<ExprAST> ParseIfExpr() { + getNextToken(); // eat the if. + + // condition. + auto Cond = ParseExpression(); + if (!Cond) + return nullptr; + + if (CurTok != tok_then) + return LogError("expected then"); + getNextToken(); // eat the then + + auto Then = ParseExpression(); + if (!Then) + return nullptr; + + if (CurTok != tok_else) + return LogError("expected else"); + + getNextToken(); + + auto Else = ParseExpression(); + if (!Else) + return nullptr; + + return std::make_unique<IfExprAST>(std::move(Cond), std::move(Then), + std::move(Else)); +} + +/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression +static std::unique_ptr<ExprAST> ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return LogError("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return LogError("expected '=' after for"); + getNextToken(); // eat '='. + + auto Start = ParseExpression(); + if (!Start) + return nullptr; + if (CurTok != ',') + return LogError("expected ',' after for start value"); + getNextToken(); + + auto End = ParseExpression(); + if (!End) + return nullptr; + + // The step value is optional. + std::unique_ptr<ExprAST> Step; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (!Step) + return nullptr; + } + + if (CurTok != tok_in) + return LogError("expected 'in' after for"); + getNextToken(); // eat 'in'. + + auto Body = ParseExpression(); + if (!Body) + return nullptr; + + return std::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End), + std::move(Step), std::move(Body)); +} + +/// varexpr ::= 'var' identifier ('=' expression)? +// (',' identifier ('=' expression)?)* 'in' expression +static std::unique_ptr<ExprAST> ParseVarExpr() { + getNextToken(); // eat the var. + + std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; + + // At least one variable name is required. + if (CurTok != tok_identifier) + return LogError("expected identifier after var"); + + while (true) { + std::string Name = IdentifierStr; + getNextToken(); // eat identifier. + + // Read the optional initializer. + std::unique_ptr<ExprAST> Init = nullptr; + if (CurTok == '=') { + getNextToken(); // eat the '='. + + Init = ParseExpression(); + if (!Init) + return nullptr; + } + + VarNames.push_back(std::make_pair(Name, std::move(Init))); + + // End of var list, exit loop. + if (CurTok != ',') + break; + getNextToken(); // eat the ','. + + if (CurTok != tok_identifier) + return LogError("expected identifier list after var"); + } + + // At this point, we have to have 'in'. + if (CurTok != tok_in) + return LogError("expected 'in' keyword after 'var'"); + getNextToken(); // eat 'in'. + + auto Body = ParseExpression(); + if (!Body) + return nullptr; + + return std::make_unique<VarExprAST>(std::move(VarNames), std::move(Body)); +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +/// ::= ifexpr +/// ::= forexpr +/// ::= varexpr +static std::unique_ptr<ExprAST> ParsePrimary() { + switch (CurTok) { + default: + return LogError("unknown token when expecting an expression"); + case tok_identifier: + return ParseIdentifierExpr(); + case tok_number: + return ParseNumberExpr(); + case '(': + return ParseParenExpr(); + case tok_if: + return ParseIfExpr(); + case tok_for: + return ParseForExpr(); + case tok_var: + return ParseVarExpr(); + } +} + +/// unary +/// ::= primary +/// ::= '!' unary +static std::unique_ptr<ExprAST> ParseUnary() { + // If the current token is not an operator, it must be a primary expr. + if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') + return ParsePrimary(); + + // If this is a unary operator, read it. + int Opc = CurTok; + getNextToken(); + if (auto Operand = ParseUnary()) + return std::make_unique<UnaryExprAST>(Opc, std::move(Operand)); + return nullptr; +} + +/// binoprhs +/// ::= ('+' unary)* +static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, + std::unique_ptr<ExprAST> LHS) { + // If this is a binop, find its precedence. + while (true) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the unary expression after the binary operator. + auto RHS = ParseUnary(); + if (!RHS) + return nullptr; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); + if (!RHS) + return nullptr; + } + + // Merge LHS/RHS. + LHS = + std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS)); + } +} + +/// expression +/// ::= unary binoprhs +/// +static std::unique_ptr<ExprAST> ParseExpression() { + auto LHS = ParseUnary(); + if (!LHS) + return nullptr; + + return ParseBinOpRHS(0, std::move(LHS)); +} + +/// prototype +/// ::= id '(' id* ')' +/// ::= binary LETTER number? (id, id) +/// ::= unary LETTER (id) +static std::unique_ptr<PrototypeAST> ParsePrototype() { + std::string FnName; + + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. + unsigned BinaryPrecedence = 30; + + switch (CurTok) { + default: + return LogErrorP("Expected function name in prototype"); + case tok_identifier: + FnName = IdentifierStr; + Kind = 0; + getNextToken(); + break; + case tok_unary: + getNextToken(); + if (!isascii(CurTok)) + return LogErrorP("Expected unary operator"); + FnName = "unary"; + FnName += (char)CurTok; + Kind = 1; + getNextToken(); + break; + case tok_binary: + getNextToken(); + if (!isascii(CurTok)) + return LogErrorP("Expected binary operator"); + FnName = "binary"; + FnName += (char)CurTok; + Kind = 2; + getNextToken(); + + // Read the precedence if present. + if (CurTok == tok_number) { + if (NumVal < 1 || NumVal > 100) + return LogErrorP("Invalid precedence: must be 1..100"); + BinaryPrecedence = (unsigned)NumVal; + getNextToken(); + } + break; + } + + if (CurTok != '(') + return LogErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return LogErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + // Verify right number of names for operator. + if (Kind && ArgNames.size() != Kind) + return LogErrorP("Invalid number of operands for operator"); + + return std::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0, + BinaryPrecedence); +} + +/// definition ::= 'def' prototype expression +static std::unique_ptr<FunctionAST> ParseDefinition() { + getNextToken(); // eat def. + auto Proto = ParsePrototype(); + if (!Proto) + return nullptr; + + if (auto E = ParseExpression()) + return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); + return nullptr; +} + +/// toplevelexpr ::= expression +static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { + if (auto E = ParseExpression()) { + // Make an anonymous proto. + auto Proto = std::make_unique<PrototypeAST>("__anon_expr", + std::vector<std::string>()); + return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); + } + return nullptr; +} + +/// external ::= 'extern' prototype +static std::unique_ptr<PrototypeAST> ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static LLVMContext TheContext; +static IRBuilder<> Builder(TheContext); +static std::unique_ptr<Module> TheModule; +static std::map<std::string, AllocaInst *> NamedValues; +static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos; + +Value *LogErrorV(const char *Str) { + LogError(Str); + return nullptr; +} + +Function *getFunction(std::string Name) { + // First, see if the function has already been added to the current module. + if (auto *F = TheModule->getFunction(Name)) + return F; + + // If not, check whether we can codegen the declaration from some existing + // prototype. + auto FI = FunctionProtos.find(Name); + if (FI != FunctionProtos.end()) + return FI->second->codegen(); + + // If no existing prototype exists, return null. + return nullptr; +} + +/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of +/// the function. This is used for mutable variables etc. +static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, + StringRef VarName) { + IRBuilder<> TmpB(&TheFunction->getEntryBlock(), + TheFunction->getEntryBlock().begin()); + return TmpB.CreateAlloca(Type::getDoubleTy(TheContext), nullptr, VarName); +} + +Value *NumberExprAST::codegen() { + return ConstantFP::get(TheContext, APFloat(Val)); +} + +Value *VariableExprAST::codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + if (!V) + return LogErrorV("Unknown variable name"); + + // Load the value. + return Builder.CreateLoad(V, Name.c_str()); +} + +Value *UnaryExprAST::codegen() { + Value *OperandV = Operand->codegen(); + if (!OperandV) + return nullptr; + + Function *F = getFunction(std::string("unary") + Opcode); + if (!F) + return LogErrorV("Unknown unary operator"); + + return Builder.CreateCall(F, OperandV, "unop"); +} + +Value *BinaryExprAST::codegen() { + // Special case '=' because we don't want to emit the LHS as an expression. + if (Op == '=') { + // Assignment requires the LHS to be an identifier. + // This assume we're building without RTTI because LLVM builds that way by + // default. If you build LLVM with RTTI this can be changed to a + // dynamic_cast for automatic error checking. + VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get()); + if (!LHSE) + return LogErrorV("destination of '=' must be a variable"); + // Codegen the RHS. + Value *Val = RHS->codegen(); + if (!Val) + return nullptr; + + // Look up the name. + Value *Variable = NamedValues[LHSE->getName()]; + if (!Variable) + return LogErrorV("Unknown variable name"); + + Builder.CreateStore(Val, Variable); + return Val; + } + + Value *L = LHS->codegen(); + Value *R = RHS->codegen(); + if (!L || !R) + return nullptr; + + switch (Op) { + case '+': + return Builder.CreateFAdd(L, R, "addtmp"); + case '-': + return Builder.CreateFSub(L, R, "subtmp"); + case '*': + return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext), "booltmp"); + default: + break; + } + + // If it wasn't a builtin binary operator, it must be a user defined one. Emit + // a call to it. + Function *F = getFunction(std::string("binary") + Op); + assert(F && "binary operator not found!"); + + Value *Ops[] = {L, R}; + return Builder.CreateCall(F, Ops, "binop"); +} + +Value *CallExprAST::codegen() { + // Look up the name in the global module table. + Function *CalleeF = getFunction(Callee); + if (!CalleeF) + return LogErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return LogErrorV("Incorrect # arguments passed"); + + std::vector<Value *> ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->codegen()); + if (!ArgsV.back()) + return nullptr; + } + + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); +} + +Value *IfExprAST::codegen() { + Value *CondV = Cond->codegen(); + if (!CondV) + return nullptr; + + // Convert condition to a bool by comparing non-equal to 0.0. + CondV = Builder.CreateFCmpONE( + CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond"); + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(TheContext, "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(TheContext, "else"); + BasicBlock *MergeBB = BasicBlock::Create(TheContext, "ifcont"); + + Builder.CreateCondBr(CondV, ThenBB, ElseBB); + + // Emit then value. + Builder.SetInsertPoint(ThenBB); + + Value *ThenV = Then->codegen(); + if (!ThenV) + return nullptr; + + Builder.CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder.GetInsertBlock(); + + // Emit else block. + TheFunction->getBasicBlockList().push_back(ElseBB); + Builder.SetInsertPoint(ElseBB); + + Value *ElseV = Else->codegen(); + if (!ElseV) + return nullptr; + + Builder.CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder.GetInsertBlock(); + + // Emit merge block. + TheFunction->getBasicBlockList().push_back(MergeBB); + Builder.SetInsertPoint(MergeBB); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, "iftmp"); + + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; +} + +// Output for-loop as: +// var = alloca double +// ... +// start = startexpr +// store start -> var +// goto loop +// loop: +// ... +// bodyexpr +// ... +// loopend: +// step = stepexpr +// endcond = endexpr +// +// curvar = load var +// nextvar = curvar + step +// store nextvar -> var +// br endcond, loop, endloop +// outloop: +Value *ForExprAST::codegen() { + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create an alloca for the variable in the entry block. + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); + + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->codegen(); + if (!StartVal) + return nullptr; + + // Store the value into the alloca. + Builder.CreateStore(StartVal, Alloca); + + // Make the new basic block for the loop header, inserting after current + // block. + BasicBlock *LoopBB = BasicBlock::Create(TheContext, "loop", TheFunction); + + // Insert an explicit fall through from the current block to the LoopBB. + Builder.CreateBr(LoopBB); + + // Start insertion in LoopBB. + Builder.SetInsertPoint(LoopBB); + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + AllocaInst *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Alloca; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (!Body->codegen()) + return nullptr; + + // Emit the step value. + Value *StepVal = nullptr; + if (Step) { + StepVal = Step->codegen(); + if (!StepVal) + return nullptr; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(TheContext, APFloat(1.0)); + } + + // Compute the end condition. + Value *EndCond = End->codegen(); + if (!EndCond) + return nullptr; + + // Reload, increment, and restore the alloca. This handles the case where + // the body of the loop mutates the variable. + Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str()); + Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar"); + Builder.CreateStore(NextVar, Alloca); + + // Convert condition to a bool by comparing non-equal to 0.0. + EndCond = Builder.CreateFCmpONE( + EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond"); + + // Create the "after loop" block and insert it. + BasicBlock *AfterBB = + BasicBlock::Create(TheContext, "afterloop", TheFunction); + + // Insert the conditional branch into the end of LoopEndBB. + Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + + // Any new code will be inserted in AfterBB. + Builder.SetInsertPoint(AfterBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(TheContext)); +} + +Value *VarExprAST::codegen() { + std::vector<AllocaInst *> OldBindings; + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Register all variables and emit their initializer. + for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { + const std::string &VarName = VarNames[i].first; + ExprAST *Init = VarNames[i].second.get(); + + // Emit the initializer before adding the variable to scope, this prevents + // the initializer from referencing the variable itself, and permits stuff + // like this: + // var a = 1 in + // var a = a in ... # refers to outer 'a'. + Value *InitVal; + if (Init) { + InitVal = Init->codegen(); + if (!InitVal) + return nullptr; + } else { // If not specified, use 0.0. + InitVal = ConstantFP::get(TheContext, APFloat(0.0)); + } + + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); + Builder.CreateStore(InitVal, Alloca); + + // Remember the old variable binding so that we can restore the binding when + // we unrecurse. + OldBindings.push_back(NamedValues[VarName]); + + // Remember this binding. + NamedValues[VarName] = Alloca; + } + + // Codegen the body, now that all vars are in scope. + Value *BodyVal = Body->codegen(); + if (!BodyVal) + return nullptr; + + // Pop all our variables from scope. + for (unsigned i = 0, e = VarNames.size(); i != e; ++i) + NamedValues[VarNames[i].first] = OldBindings[i]; + + // Return the body computation. + return BodyVal; +} + +Function *PrototypeAST::codegen() { + // Make the function type: double(double,double) etc. + std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(TheContext)); + FunctionType *FT = + FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false); + + Function *F = + Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); + + // Set names for all arguments. + unsigned Idx = 0; + for (auto &Arg : F->args()) + Arg.setName(Args[Idx++]); + + return F; +} + +Function *FunctionAST::codegen() { + // Transfer ownership of the prototype to the FunctionProtos map, but keep a + // reference to it for use below. + auto &P = *Proto; + FunctionProtos[Proto->getName()] = std::move(Proto); + Function *TheFunction = getFunction(P.getName()); + if (!TheFunction) + return nullptr; + + // If this is an operator, install it. + if (P.isBinaryOp()) + BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence(); + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction); + Builder.SetInsertPoint(BB); + + // Record the function arguments in the NamedValues map. + NamedValues.clear(); + for (auto &Arg : TheFunction->args()) { + // Create an alloca for this variable. + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName()); + + // Store the initial value into the alloca. + Builder.CreateStore(&Arg, Alloca); + + // Add arguments to variable symbol table. + NamedValues[std::string(Arg.getName())] = Alloca; + } + + if (Value *RetVal = Body->codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + + if (P.isBinaryOp()) + BinopPrecedence.erase(P.getOperatorName()); + return nullptr; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static void InitializeModuleAndPassManager() { + // Open a new module. + TheModule = std::make_unique<Module>("my cool jit", TheContext); +} + +static void HandleDefinition() { + if (auto FnAST = ParseDefinition()) { + if (auto *FnIR = FnAST->codegen()) { + fprintf(stderr, "Read function definition:"); + FnIR->print(errs()); + fprintf(stderr, "\n"); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (auto ProtoAST = ParseExtern()) { + if (auto *FnIR = ProtoAST->codegen()) { + fprintf(stderr, "Read extern: "); + FnIR->print(errs()); + fprintf(stderr, "\n"); + FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (auto FnAST = ParseTopLevelExpr()) { + FnAST->codegen(); + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (true) { + switch (CurTok) { + case tok_eof: + return; + case ';': // ignore top-level semicolons. + getNextToken(); + break; + case tok_def: + HandleDefinition(); + break; + case tok_extern: + HandleExtern(); + break; + default: + HandleTopLevelExpression(); + break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +#ifdef _WIN32 +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT +#endif + +/// putchard - putchar that takes a double and returns 0. +extern "C" DLLEXPORT double putchard(double X) { + fputc((char)X, stderr); + return 0; +} + +/// printd - printf that takes a double prints it as "%f\n", returning 0. +extern "C" DLLEXPORT double printd(double X) { + fprintf(stderr, "%f\n", X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + InitializeModuleAndPassManager(); + + // Run the main "interpreter loop" now. + MainLoop(); + + // Initialize the target registry etc. + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmParsers(); + InitializeAllAsmPrinters(); + + auto TargetTriple = sys::getDefaultTargetTriple(); + TheModule->setTargetTriple(TargetTriple); + + std::string Error; + auto Target = TargetRegistry::lookupTarget(TargetTriple, Error); + + // Print an error and exit if we couldn't find the requested target. + // This generally occurs if we've forgotten to initialise the + // TargetRegistry or we have a bogus target triple. + if (!Target) { + errs() << Error; + return 1; + } + + auto CPU = "generic"; + auto Features = ""; + + TargetOptions opt; + auto RM = Optional<Reloc::Model>(); + auto TheTargetMachine = + Target->createTargetMachine(TargetTriple, CPU, Features, opt, RM); + + TheModule->setDataLayout(TheTargetMachine->createDataLayout()); + +// THIS LINE CHANGED FOR THE h_da FbI VERSION! + auto Filename = "test8.o"; + std::error_code EC; + raw_fd_ostream dest(Filename, EC, sys::fs::OF_None); + + if (EC) { + errs() << "Could not open file: " << EC.message(); + return 1; + } + + legacy::PassManager pass; + auto FileType = CGFT_ObjectFile; + + if (TheTargetMachine->addPassesToEmitFile(pass, dest, nullptr, FileType)) { + errs() << "TheTargetMachine can't emit a file of this type"; + return 1; + } + + pass.run(*TheModule); + dest.flush(); + + outs() << "Wrote " << Filename << "\n"; + + return 0; +} diff --git a/LLVM/cplusplus/toy9.cpp b/LLVM/cplusplus/toy9.cpp new file mode 100644 index 0000000..ea134cd --- /dev/null +++ b/LLVM/cplusplus/toy9.cpp @@ -0,0 +1,1452 @@ +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Transforms/Scalar.h" +#include <cctype> +#include <cstdio> +#include <map> +#include <string> +#include <vector> +// Next Line modified for the FbI/h_da.de version (only) +#include "KaleidoscopeJIT.h" + +using namespace llvm; +using namespace llvm::orc; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, + tok_extern = -3, + + // primary + tok_identifier = -4, + tok_number = -5, + + // control + tok_if = -6, + tok_then = -7, + tok_else = -8, + tok_for = -9, + tok_in = -10, + + // operators + tok_binary = -11, + tok_unary = -12, + + // var definition + tok_var = -13 +}; + +std::string getTokName(int Tok) { + switch (Tok) { + case tok_eof: + return "eof"; + case tok_def: + return "def"; + case tok_extern: + return "extern"; + case tok_identifier: + return "identifier"; + case tok_number: + return "number"; + case tok_if: + return "if"; + case tok_then: + return "then"; + case tok_else: + return "else"; + case tok_for: + return "for"; + case tok_in: + return "in"; + case tok_binary: + return "binary"; + case tok_unary: + return "unary"; + case tok_var: + return "var"; + } + return std::string(1, (char)Tok); +} + +namespace { +class PrototypeAST; +class ExprAST; +} +static LLVMContext TheContext; +static IRBuilder<> Builder(TheContext); +struct DebugInfo { + DICompileUnit *TheCU; + DIType *DblTy; + std::vector<DIScope *> LexicalBlocks; + + void emitLocation(ExprAST *AST); + DIType *getDoubleTy(); +} KSDbgInfo; + +struct SourceLocation { + int Line; + int Col; +}; +static SourceLocation CurLoc; +static SourceLocation LexLoc = {1, 0}; + +static int advance() { + int LastChar = getchar(); + + if (LastChar == '\n' || LastChar == '\r') { + LexLoc.Line++; + LexLoc.Col = 0; + } else + LexLoc.Col++; + return LastChar; +} + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = advance(); + + CurLoc = LexLoc; + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = advance()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") + return tok_def; + if (IdentifierStr == "extern") + return tok_extern; + if (IdentifierStr == "if") + return tok_if; + if (IdentifierStr == "then") + return tok_then; + if (IdentifierStr == "else") + return tok_else; + if (IdentifierStr == "for") + return tok_for; + if (IdentifierStr == "in") + return tok_in; + if (IdentifierStr == "binary") + return tok_binary; + if (IdentifierStr == "unary") + return tok_unary; + if (IdentifierStr == "var") + return tok_var; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = advance(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), nullptr); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do + LastChar = advance(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = advance(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// +namespace { + +raw_ostream &indent(raw_ostream &O, int size) { + return O << std::string(size, ' '); +} + +/// ExprAST - Base class for all expression nodes. +class ExprAST { + SourceLocation Loc; + +public: + ExprAST(SourceLocation Loc = CurLoc) : Loc(Loc) {} + virtual ~ExprAST() {} + virtual Value *codegen() = 0; + int getLine() const { return Loc.Line; } + int getCol() const { return Loc.Col; } + virtual raw_ostream &dump(raw_ostream &out, int ind) { + return out << ':' << getLine() << ':' << getCol() << '\n'; + } +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; + +public: + NumberExprAST(double Val) : Val(Val) {} + raw_ostream &dump(raw_ostream &out, int ind) override { + return ExprAST::dump(out << Val, ind); + } + Value *codegen() override; +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; + +public: + VariableExprAST(SourceLocation Loc, const std::string &Name) + : ExprAST(Loc), Name(Name) {} + const std::string &getName() const { return Name; } + Value *codegen() override; + raw_ostream &dump(raw_ostream &out, int ind) override { + return ExprAST::dump(out << Name, ind); + } +}; + +/// UnaryExprAST - Expression class for a unary operator. +class UnaryExprAST : public ExprAST { + char Opcode; + std::unique_ptr<ExprAST> Operand; + +public: + UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand) + : Opcode(Opcode), Operand(std::move(Operand)) {} + Value *codegen() override; + raw_ostream &dump(raw_ostream &out, int ind) override { + ExprAST::dump(out << "unary" << Opcode, ind); + Operand->dump(out, ind + 1); + return out; + } +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + std::unique_ptr<ExprAST> LHS, RHS; + +public: + BinaryExprAST(SourceLocation Loc, char Op, std::unique_ptr<ExprAST> LHS, + std::unique_ptr<ExprAST> RHS) + : ExprAST(Loc), Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} + Value *codegen() override; + raw_ostream &dump(raw_ostream &out, int ind) override { + ExprAST::dump(out << "binary" << Op, ind); + LHS->dump(indent(out, ind) << "LHS:", ind + 1); + RHS->dump(indent(out, ind) << "RHS:", ind + 1); + return out; + } +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<std::unique_ptr<ExprAST>> Args; + +public: + CallExprAST(SourceLocation Loc, const std::string &Callee, + std::vector<std::unique_ptr<ExprAST>> Args) + : ExprAST(Loc), Callee(Callee), Args(std::move(Args)) {} + Value *codegen() override; + raw_ostream &dump(raw_ostream &out, int ind) override { + ExprAST::dump(out << "call " << Callee, ind); + for (const auto &Arg : Args) + Arg->dump(indent(out, ind + 1), ind + 1); + return out; + } +}; + +/// IfExprAST - Expression class for if/then/else. +class IfExprAST : public ExprAST { + std::unique_ptr<ExprAST> Cond, Then, Else; + +public: + IfExprAST(SourceLocation Loc, std::unique_ptr<ExprAST> Cond, + std::unique_ptr<ExprAST> Then, std::unique_ptr<ExprAST> Else) + : ExprAST(Loc), Cond(std::move(Cond)), Then(std::move(Then)), + Else(std::move(Else)) {} + Value *codegen() override; + raw_ostream &dump(raw_ostream &out, int ind) override { + ExprAST::dump(out << "if", ind); + Cond->dump(indent(out, ind) << "Cond:", ind + 1); + Then->dump(indent(out, ind) << "Then:", ind + 1); + Else->dump(indent(out, ind) << "Else:", ind + 1); + return out; + } +}; + +/// ForExprAST - Expression class for for/in. +class ForExprAST : public ExprAST { + std::string VarName; + std::unique_ptr<ExprAST> Start, End, Step, Body; + +public: + ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start, + std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step, + std::unique_ptr<ExprAST> Body) + : VarName(VarName), Start(std::move(Start)), End(std::move(End)), + Step(std::move(Step)), Body(std::move(Body)) {} + Value *codegen() override; + raw_ostream &dump(raw_ostream &out, int ind) override { + ExprAST::dump(out << "for", ind); + Start->dump(indent(out, ind) << "Cond:", ind + 1); + End->dump(indent(out, ind) << "End:", ind + 1); + Step->dump(indent(out, ind) << "Step:", ind + 1); + Body->dump(indent(out, ind) << "Body:", ind + 1); + return out; + } +}; + +/// VarExprAST - Expression class for var/in +class VarExprAST : public ExprAST { + std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; + std::unique_ptr<ExprAST> Body; + +public: + VarExprAST( + std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames, + std::unique_ptr<ExprAST> Body) + : VarNames(std::move(VarNames)), Body(std::move(Body)) {} + Value *codegen() override; + raw_ostream &dump(raw_ostream &out, int ind) override { + ExprAST::dump(out << "var", ind); + for (const auto &NamedVar : VarNames) + NamedVar.second->dump(indent(out, ind) << NamedVar.first << ':', ind + 1); + Body->dump(indent(out, ind) << "Body:", ind + 1); + return out; + } +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes), as well as if it is an operator. +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; + bool IsOperator; + unsigned Precedence; // Precedence if a binary op. + int Line; + +public: + PrototypeAST(SourceLocation Loc, const std::string &Name, + std::vector<std::string> Args, bool IsOperator = false, + unsigned Prec = 0) + : Name(Name), Args(std::move(Args)), IsOperator(IsOperator), + Precedence(Prec), Line(Loc.Line) {} + Function *codegen(); + const std::string &getName() const { return Name; } + + bool isUnaryOp() const { return IsOperator && Args.size() == 1; } + bool isBinaryOp() const { return IsOperator && Args.size() == 2; } + + char getOperatorName() const { + assert(isUnaryOp() || isBinaryOp()); + return Name[Name.size() - 1]; + } + + unsigned getBinaryPrecedence() const { return Precedence; } + int getLine() const { return Line; } +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + std::unique_ptr<PrototypeAST> Proto; + std::unique_ptr<ExprAST> Body; + +public: + FunctionAST(std::unique_ptr<PrototypeAST> Proto, + std::unique_ptr<ExprAST> Body) + : Proto(std::move(Proto)), Body(std::move(Body)) {} + Function *codegen(); + raw_ostream &dump(raw_ostream &out, int ind) { + indent(out, ind) << "FunctionAST\n"; + ++ind; + indent(out, ind) << "Body:"; + return Body ? Body->dump(out, ind) : out << "null\n"; + } +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { return CurTok = gettok(); } + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) + return -1; + return TokPrec; +} + +/// LogError* - These are little helper functions for error handling. +std::unique_ptr<ExprAST> LogError(const char *Str) { + fprintf(stderr, "Error: %s\n", Str); + return nullptr; +} + +std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { + LogError(Str); + return nullptr; +} + +static std::unique_ptr<ExprAST> ParseExpression(); + +/// numberexpr ::= number +static std::unique_ptr<ExprAST> ParseNumberExpr() { + auto Result = std::make_unique<NumberExprAST>(NumVal); + getNextToken(); // consume the number + return std::move(Result); +} + +/// parenexpr ::= '(' expression ')' +static std::unique_ptr<ExprAST> ParseParenExpr() { + getNextToken(); // eat (. + auto V = ParseExpression(); + if (!V) + return nullptr; + + if (CurTok != ')') + return LogError("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static std::unique_ptr<ExprAST> ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + SourceLocation LitLoc = CurLoc; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return std::make_unique<VariableExprAST>(LitLoc, IdName); + + // Call. + getNextToken(); // eat ( + std::vector<std::unique_ptr<ExprAST>> Args; + if (CurTok != ')') { + while (1) { + if (auto Arg = ParseExpression()) + Args.push_back(std::move(Arg)); + else + return nullptr; + + if (CurTok == ')') + break; + + if (CurTok != ',') + return LogError("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return std::make_unique<CallExprAST>(LitLoc, IdName, std::move(Args)); +} + +/// ifexpr ::= 'if' expression 'then' expression 'else' expression +static std::unique_ptr<ExprAST> ParseIfExpr() { + SourceLocation IfLoc = CurLoc; + + getNextToken(); // eat the if. + + // condition. + auto Cond = ParseExpression(); + if (!Cond) + return nullptr; + + if (CurTok != tok_then) + return LogError("expected then"); + getNextToken(); // eat the then + + auto Then = ParseExpression(); + if (!Then) + return nullptr; + + if (CurTok != tok_else) + return LogError("expected else"); + + getNextToken(); + + auto Else = ParseExpression(); + if (!Else) + return nullptr; + + return std::make_unique<IfExprAST>(IfLoc, std::move(Cond), std::move(Then), + std::move(Else)); +} + +/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression +static std::unique_ptr<ExprAST> ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return LogError("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return LogError("expected '=' after for"); + getNextToken(); // eat '='. + + auto Start = ParseExpression(); + if (!Start) + return nullptr; + if (CurTok != ',') + return LogError("expected ',' after for start value"); + getNextToken(); + + auto End = ParseExpression(); + if (!End) + return nullptr; + + // The step value is optional. + std::unique_ptr<ExprAST> Step; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (!Step) + return nullptr; + } + + if (CurTok != tok_in) + return LogError("expected 'in' after for"); + getNextToken(); // eat 'in'. + + auto Body = ParseExpression(); + if (!Body) + return nullptr; + + return std::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End), + std::move(Step), std::move(Body)); +} + +/// varexpr ::= 'var' identifier ('=' expression)? +// (',' identifier ('=' expression)?)* 'in' expression +static std::unique_ptr<ExprAST> ParseVarExpr() { + getNextToken(); // eat the var. + + std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames; + + // At least one variable name is required. + if (CurTok != tok_identifier) + return LogError("expected identifier after var"); + + while (1) { + std::string Name = IdentifierStr; + getNextToken(); // eat identifier. + + // Read the optional initializer. + std::unique_ptr<ExprAST> Init = nullptr; + if (CurTok == '=') { + getNextToken(); // eat the '='. + + Init = ParseExpression(); + if (!Init) + return nullptr; + } + + VarNames.push_back(std::make_pair(Name, std::move(Init))); + + // End of var list, exit loop. + if (CurTok != ',') + break; + getNextToken(); // eat the ','. + + if (CurTok != tok_identifier) + return LogError("expected identifier list after var"); + } + + // At this point, we have to have 'in'. + if (CurTok != tok_in) + return LogError("expected 'in' keyword after 'var'"); + getNextToken(); // eat 'in'. + + auto Body = ParseExpression(); + if (!Body) + return nullptr; + + return std::make_unique<VarExprAST>(std::move(VarNames), std::move(Body)); +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +/// ::= ifexpr +/// ::= forexpr +/// ::= varexpr +static std::unique_ptr<ExprAST> ParsePrimary() { + switch (CurTok) { + default: + return LogError("unknown token when expecting an expression"); + case tok_identifier: + return ParseIdentifierExpr(); + case tok_number: + return ParseNumberExpr(); + case '(': + return ParseParenExpr(); + case tok_if: + return ParseIfExpr(); + case tok_for: + return ParseForExpr(); + case tok_var: + return ParseVarExpr(); + } +} + +/// unary +/// ::= primary +/// ::= '!' unary +static std::unique_ptr<ExprAST> ParseUnary() { + // If the current token is not an operator, it must be a primary expr. + if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') + return ParsePrimary(); + + // If this is a unary operator, read it. + int Opc = CurTok; + getNextToken(); + if (auto Operand = ParseUnary()) + return std::make_unique<UnaryExprAST>(Opc, std::move(Operand)); + return nullptr; +} + +/// binoprhs +/// ::= ('+' unary)* +static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, + std::unique_ptr<ExprAST> LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + SourceLocation BinLoc = CurLoc; + getNextToken(); // eat binop + + // Parse the unary expression after the binary operator. + auto RHS = ParseUnary(); + if (!RHS) + return nullptr; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); + if (!RHS) + return nullptr; + } + + // Merge LHS/RHS. + LHS = std::make_unique<BinaryExprAST>(BinLoc, BinOp, std::move(LHS), + std::move(RHS)); + } +} + +/// expression +/// ::= unary binoprhs +/// +static std::unique_ptr<ExprAST> ParseExpression() { + auto LHS = ParseUnary(); + if (!LHS) + return nullptr; + + return ParseBinOpRHS(0, std::move(LHS)); +} + +/// prototype +/// ::= id '(' id* ')' +/// ::= binary LETTER number? (id, id) +/// ::= unary LETTER (id) +static std::unique_ptr<PrototypeAST> ParsePrototype() { + std::string FnName; + + SourceLocation FnLoc = CurLoc; + + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. + unsigned BinaryPrecedence = 30; + + switch (CurTok) { + default: + return LogErrorP("Expected function name in prototype"); + case tok_identifier: + FnName = IdentifierStr; + Kind = 0; + getNextToken(); + break; + case tok_unary: + getNextToken(); + if (!isascii(CurTok)) + return LogErrorP("Expected unary operator"); + FnName = "unary"; + FnName += (char)CurTok; + Kind = 1; + getNextToken(); + break; + case tok_binary: + getNextToken(); + if (!isascii(CurTok)) + return LogErrorP("Expected binary operator"); + FnName = "binary"; + FnName += (char)CurTok; + Kind = 2; + getNextToken(); + + // Read the precedence if present. + if (CurTok == tok_number) { + if (NumVal < 1 || NumVal > 100) + return LogErrorP("Invalid precedence: must be 1..100"); + BinaryPrecedence = (unsigned)NumVal; + getNextToken(); + } + break; + } + + if (CurTok != '(') + return LogErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return LogErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + // Verify right number of names for operator. + if (Kind && ArgNames.size() != Kind) + return LogErrorP("Invalid number of operands for operator"); + + return std::make_unique<PrototypeAST>(FnLoc, FnName, ArgNames, Kind != 0, + BinaryPrecedence); +} + +/// definition ::= 'def' prototype expression +static std::unique_ptr<FunctionAST> ParseDefinition() { + getNextToken(); // eat def. + auto Proto = ParsePrototype(); + if (!Proto) + return nullptr; + + if (auto E = ParseExpression()) + return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); + return nullptr; +} + +/// toplevelexpr ::= expression +static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { + SourceLocation FnLoc = CurLoc; + if (auto E = ParseExpression()) { + // Make an anonymous proto. + auto Proto = std::make_unique<PrototypeAST>(FnLoc, "__anon_expr", + std::vector<std::string>()); + return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); + } + return nullptr; +} + +/// external ::= 'extern' prototype +static std::unique_ptr<PrototypeAST> ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Debug Info Support +//===----------------------------------------------------------------------===// + +static std::unique_ptr<DIBuilder> DBuilder; + +DIType *DebugInfo::getDoubleTy() { + if (DblTy) + return DblTy; + + DblTy = DBuilder->createBasicType("double", 64, dwarf::DW_ATE_float); + return DblTy; +} + +void DebugInfo::emitLocation(ExprAST *AST) { + if (!AST) + return Builder.SetCurrentDebugLocation(DebugLoc()); + DIScope *Scope; + if (LexicalBlocks.empty()) + Scope = TheCU; + else + Scope = LexicalBlocks.back(); + Builder.SetCurrentDebugLocation( + DebugLoc::get(AST->getLine(), AST->getCol(), Scope)); +} + +static DISubroutineType *CreateFunctionType(unsigned NumArgs, DIFile *Unit) { + SmallVector<Metadata *, 8> EltTys; + DIType *DblTy = KSDbgInfo.getDoubleTy(); + + // Add the result type. + EltTys.push_back(DblTy); + + for (unsigned i = 0, e = NumArgs; i != e; ++i) + EltTys.push_back(DblTy); + + return DBuilder->createSubroutineType(DBuilder->getOrCreateTypeArray(EltTys)); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static std::unique_ptr<Module> TheModule; +static std::map<std::string, AllocaInst *> NamedValues; +static std::unique_ptr<KaleidoscopeJIT> TheJIT; +static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos; + +Value *LogErrorV(const char *Str) { + LogError(Str); + return nullptr; +} + +Function *getFunction(std::string Name) { + // First, see if the function has already been added to the current module. + if (auto *F = TheModule->getFunction(Name)) + return F; + + // If not, check whether we can codegen the declaration from some existing + // prototype. + auto FI = FunctionProtos.find(Name); + if (FI != FunctionProtos.end()) + return FI->second->codegen(); + + // If no existing prototype exists, return null. + return nullptr; +} + +/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of +/// the function. This is used for mutable variables etc. +static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, + StringRef VarName) { + IRBuilder<> TmpB(&TheFunction->getEntryBlock(), + TheFunction->getEntryBlock().begin()); + return TmpB.CreateAlloca(Type::getDoubleTy(TheContext), nullptr, VarName); +} + +Value *NumberExprAST::codegen() { + KSDbgInfo.emitLocation(this); + return ConstantFP::get(TheContext, APFloat(Val)); +} + +Value *VariableExprAST::codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + if (!V) + return LogErrorV("Unknown variable name"); + + KSDbgInfo.emitLocation(this); + // Load the value. + return Builder.CreateLoad(V, Name.c_str()); +} + +Value *UnaryExprAST::codegen() { + Value *OperandV = Operand->codegen(); + if (!OperandV) + return nullptr; + + Function *F = getFunction(std::string("unary") + Opcode); + if (!F) + return LogErrorV("Unknown unary operator"); + + KSDbgInfo.emitLocation(this); + return Builder.CreateCall(F, OperandV, "unop"); +} + +Value *BinaryExprAST::codegen() { + KSDbgInfo.emitLocation(this); + + // Special case '=' because we don't want to emit the LHS as an expression. + if (Op == '=') { + // Assignment requires the LHS to be an identifier. + // This assume we're building without RTTI because LLVM builds that way by + // default. If you build LLVM with RTTI this can be changed to a + // dynamic_cast for automatic error checking. + VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get()); + if (!LHSE) + return LogErrorV("destination of '=' must be a variable"); + // Codegen the RHS. + Value *Val = RHS->codegen(); + if (!Val) + return nullptr; + + // Look up the name. + Value *Variable = NamedValues[LHSE->getName()]; + if (!Variable) + return LogErrorV("Unknown variable name"); + + Builder.CreateStore(Val, Variable); + return Val; + } + + Value *L = LHS->codegen(); + Value *R = RHS->codegen(); + if (!L || !R) + return nullptr; + + switch (Op) { + case '+': + return Builder.CreateFAdd(L, R, "addtmp"); + case '-': + return Builder.CreateFSub(L, R, "subtmp"); + case '*': + return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext), "booltmp"); + default: + break; + } + + // If it wasn't a builtin binary operator, it must be a user defined one. Emit + // a call to it. + Function *F = getFunction(std::string("binary") + Op); + assert(F && "binary operator not found!"); + + Value *Ops[] = {L, R}; + return Builder.CreateCall(F, Ops, "binop"); +} + +Value *CallExprAST::codegen() { + KSDbgInfo.emitLocation(this); + + // Look up the name in the global module table. + Function *CalleeF = getFunction(Callee); + if (!CalleeF) + return LogErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return LogErrorV("Incorrect # arguments passed"); + + std::vector<Value *> ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->codegen()); + if (!ArgsV.back()) + return nullptr; + } + + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); +} + +Value *IfExprAST::codegen() { + KSDbgInfo.emitLocation(this); + + Value *CondV = Cond->codegen(); + if (!CondV) + return nullptr; + + // Convert condition to a bool by comparing non-equal to 0.0. + CondV = Builder.CreateFCmpONE( + CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond"); + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(TheContext, "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(TheContext, "else"); + BasicBlock *MergeBB = BasicBlock::Create(TheContext, "ifcont"); + + Builder.CreateCondBr(CondV, ThenBB, ElseBB); + + // Emit then value. + Builder.SetInsertPoint(ThenBB); + + Value *ThenV = Then->codegen(); + if (!ThenV) + return nullptr; + + Builder.CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder.GetInsertBlock(); + + // Emit else block. + TheFunction->getBasicBlockList().push_back(ElseBB); + Builder.SetInsertPoint(ElseBB); + + Value *ElseV = Else->codegen(); + if (!ElseV) + return nullptr; + + Builder.CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder.GetInsertBlock(); + + // Emit merge block. + TheFunction->getBasicBlockList().push_back(MergeBB); + Builder.SetInsertPoint(MergeBB); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, "iftmp"); + + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; +} + +// Output for-loop as: +// var = alloca double +// ... +// start = startexpr +// store start -> var +// goto loop +// loop: +// ... +// bodyexpr +// ... +// loopend: +// step = stepexpr +// endcond = endexpr +// +// curvar = load var +// nextvar = curvar + step +// store nextvar -> var +// br endcond, loop, endloop +// outloop: +Value *ForExprAST::codegen() { + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create an alloca for the variable in the entry block. + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); + + KSDbgInfo.emitLocation(this); + + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->codegen(); + if (!StartVal) + return nullptr; + + // Store the value into the alloca. + Builder.CreateStore(StartVal, Alloca); + + // Make the new basic block for the loop header, inserting after current + // block. + BasicBlock *LoopBB = BasicBlock::Create(TheContext, "loop", TheFunction); + + // Insert an explicit fall through from the current block to the LoopBB. + Builder.CreateBr(LoopBB); + + // Start insertion in LoopBB. + Builder.SetInsertPoint(LoopBB); + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + AllocaInst *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Alloca; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (!Body->codegen()) + return nullptr; + + // Emit the step value. + Value *StepVal = nullptr; + if (Step) { + StepVal = Step->codegen(); + if (!StepVal) + return nullptr; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(TheContext, APFloat(1.0)); + } + + // Compute the end condition. + Value *EndCond = End->codegen(); + if (!EndCond) + return nullptr; + + // Reload, increment, and restore the alloca. This handles the case where + // the body of the loop mutates the variable. + Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str()); + Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar"); + Builder.CreateStore(NextVar, Alloca); + + // Convert condition to a bool by comparing non-equal to 0.0. + EndCond = Builder.CreateFCmpONE( + EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond"); + + // Create the "after loop" block and insert it. + BasicBlock *AfterBB = + BasicBlock::Create(TheContext, "afterloop", TheFunction); + + // Insert the conditional branch into the end of LoopEndBB. + Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + + // Any new code will be inserted in AfterBB. + Builder.SetInsertPoint(AfterBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(TheContext)); +} + +Value *VarExprAST::codegen() { + std::vector<AllocaInst *> OldBindings; + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Register all variables and emit their initializer. + for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { + const std::string &VarName = VarNames[i].first; + ExprAST *Init = VarNames[i].second.get(); + + // Emit the initializer before adding the variable to scope, this prevents + // the initializer from referencing the variable itself, and permits stuff + // like this: + // var a = 1 in + // var a = a in ... # refers to outer 'a'. + Value *InitVal; + if (Init) { + InitVal = Init->codegen(); + if (!InitVal) + return nullptr; + } else { // If not specified, use 0.0. + InitVal = ConstantFP::get(TheContext, APFloat(0.0)); + } + + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); + Builder.CreateStore(InitVal, Alloca); + + // Remember the old variable binding so that we can restore the binding when + // we unrecurse. + OldBindings.push_back(NamedValues[VarName]); + + // Remember this binding. + NamedValues[VarName] = Alloca; + } + + KSDbgInfo.emitLocation(this); + + // Codegen the body, now that all vars are in scope. + Value *BodyVal = Body->codegen(); + if (!BodyVal) + return nullptr; + + // Pop all our variables from scope. + for (unsigned i = 0, e = VarNames.size(); i != e; ++i) + NamedValues[VarNames[i].first] = OldBindings[i]; + + // Return the body computation. + return BodyVal; +} + +Function *PrototypeAST::codegen() { + // Make the function type: double(double,double) etc. + std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(TheContext)); + FunctionType *FT = + FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false); + + Function *F = + Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); + + // Set names for all arguments. + unsigned Idx = 0; + for (auto &Arg : F->args()) + Arg.setName(Args[Idx++]); + + return F; +} + +Function *FunctionAST::codegen() { + // Transfer ownership of the prototype to the FunctionProtos map, but keep a + // reference to it for use below. + auto &P = *Proto; + FunctionProtos[Proto->getName()] = std::move(Proto); + Function *TheFunction = getFunction(P.getName()); + if (!TheFunction) + return nullptr; + + // If this is an operator, install it. + if (P.isBinaryOp()) + BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence(); + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction); + Builder.SetInsertPoint(BB); + + // Create a subprogram DIE for this function. + DIFile *Unit = DBuilder->createFile(KSDbgInfo.TheCU->getFilename(), + KSDbgInfo.TheCU->getDirectory()); + DIScope *FContext = Unit; + unsigned LineNo = P.getLine(); + unsigned ScopeLine = LineNo; + DISubprogram *SP = DBuilder->createFunction( + FContext, P.getName(), StringRef(), Unit, LineNo, + CreateFunctionType(TheFunction->arg_size(), Unit), ScopeLine, + DINode::FlagPrototyped, DISubprogram::SPFlagDefinition); + TheFunction->setSubprogram(SP); + + // Push the current scope. + KSDbgInfo.LexicalBlocks.push_back(SP); + + // Unset the location for the prologue emission (leading instructions with no + // location in a function are considered part of the prologue and the debugger + // will run past them when breaking on a function) + KSDbgInfo.emitLocation(nullptr); + + // Record the function arguments in the NamedValues map. + NamedValues.clear(); + unsigned ArgIdx = 0; + for (auto &Arg : TheFunction->args()) { + // Create an alloca for this variable. + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName()); + + // Create a debug descriptor for the variable. + DILocalVariable *D = DBuilder->createParameterVariable( + SP, Arg.getName(), ++ArgIdx, Unit, LineNo, KSDbgInfo.getDoubleTy(), + true); + + DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(), + DebugLoc::get(LineNo, 0, SP), + Builder.GetInsertBlock()); + + // Store the initial value into the alloca. + Builder.CreateStore(&Arg, Alloca); + + // Add arguments to variable symbol table. + NamedValues[std::string(Arg.getName())] = Alloca; + } + + KSDbgInfo.emitLocation(Body.get()); + + if (Value *RetVal = Body->codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Pop off the lexical block for the function. + KSDbgInfo.LexicalBlocks.pop_back(); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + + if (P.isBinaryOp()) + BinopPrecedence.erase(Proto->getOperatorName()); + + // Pop off the lexical block for the function since we added it + // unconditionally. + KSDbgInfo.LexicalBlocks.pop_back(); + + return nullptr; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static void InitializeModule() { + // Open a new module. + TheModule = std::make_unique<Module>("my cool jit", TheContext); + TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout()); +} + +static void HandleDefinition() { + if (auto FnAST = ParseDefinition()) { + if (!FnAST->codegen()) + fprintf(stderr, "Error reading function definition:"); + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (auto ProtoAST = ParseExtern()) { + if (!ProtoAST->codegen()) + fprintf(stderr, "Error reading extern"); + else + FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (auto FnAST = ParseTopLevelExpr()) { + if (!FnAST->codegen()) { + fprintf(stderr, "Error generating code for top level expr"); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + switch (CurTok) { + case tok_eof: + return; + case ';': // ignore top-level semicolons. + getNextToken(); + break; + case tok_def: + HandleDefinition(); + break; + case tok_extern: + HandleExtern(); + break; + default: + HandleTopLevelExpression(); + break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +#ifdef _WIN32 +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT +#endif + +/// putchard - putchar that takes a double and returns 0. +extern "C" DLLEXPORT double putchard(double X) { + fputc((char)X, stderr); + return 0; +} + +/// printd - printf that takes a double prints it as "%f\n", returning 0. +extern "C" DLLEXPORT double printd(double X) { + fprintf(stderr, "%f\n", X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + InitializeNativeTargetAsmParser(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['='] = 2; + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + getNextToken(); + + TheJIT = std::make_unique<KaleidoscopeJIT>(); + + InitializeModule(); + + // Add the current debug info version into the module. + TheModule->addModuleFlag(Module::Warning, "Debug Info Version", + DEBUG_METADATA_VERSION); + + // Darwin only supports dwarf2. + if (Triple(sys::getProcessTriple()).isOSDarwin()) + TheModule->addModuleFlag(llvm::Module::Warning, "Dwarf Version", 2); + + // Construct the DIBuilder, we do this here because we need the module. + DBuilder = std::make_unique<DIBuilder>(*TheModule); + + // Create the compile unit for the module. + // Currently down as "fib.ks" as a filename since we're redirecting stdin + // but we'd like actual source locations. + KSDbgInfo.TheCU = DBuilder->createCompileUnit( + dwarf::DW_LANG_C, DBuilder->createFile("fib.ks", "."), + "Kaleidoscope Compiler", 0, "", 0); + + // Run the main "interpreter loop" now. + MainLoop(); + + // Finalize the debug info. + DBuilder->finalize(); + + // Print out all of the generated code. + TheModule->print(errs(), nullptr); + + return 0; +} -- GitLab