diff --git a/recursiveDescentParsers/cplusplus/halfBakedCompiler/.gitignore b/recursiveDescentParsers/cplusplus/halfBakedCompiler/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..05f374a60068bb13638ac57ca2b480737e9bd0e9 --- /dev/null +++ b/recursiveDescentParsers/cplusplus/halfBakedCompiler/.gitignore @@ -0,0 +1,4 @@ +halfbaker +*.o +.deps/ + diff --git a/recursiveDescentParsers/cplusplus/halfBakedCompiler/Makefile b/recursiveDescentParsers/cplusplus/halfBakedCompiler/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..b27b7f4df7de99b6a1ddc8f801aeb5683955107f --- /dev/null +++ b/recursiveDescentParsers/cplusplus/halfBakedCompiler/Makefile @@ -0,0 +1,84 @@ +# This Makefile made available to his students by +# Prof. Ronald Moore +# https://fbi.h-da.de/personen/ronald-moore/ +# mailto:ronald.moore@h-da.de +# with no warranties whatsoever + + +PROGS := halfbaker +SOURCES := main.cpp lexer.cpp parser.cpp astree.cpp +OBJS = $(SOURCES:.cpp=.o) + +# Uncomment only one of the next two lines (choose your c++ compiler) +# CC=g++ +CC := clang++ + +## Add your own CFLAGS if you find them necessary... such as -O3 or so... +## -g for debugging +## -std=<whatever> to select the right C++ Version +## -fmessage-length=0 disallows line wrapping in error messages +## (helps some IDEs (still?)) +# CPPFLAGS := -g -std=c++17 -Wall -fmessage-length=0 +CPPFLAGS := -g -std=c++17 -Wall + +## More preliminaries +# See https://www.gnu.org/software/make/manual/html_node/Special-Targets.html +# In this makefile, we want to keep going even if we find errors +.IGNORE : + +# Tell make that the following "targets" are "phony" +# Cf. https://www.gnu.org/software/make/manual/html_node/Phony-Targets.html#Phony-Targets +.PHONY : all clean tests + +# This absolutely needs to be the first target (so to be the default target) +all: $(PROGS) + +# Some of the "Automatic Variables" that can be used in Makefiles. +# Cf. https://www.gnu.org/software/make/manual/ - particularly +# https://www.gnu.org/software/make/manual/html_node/Automatic-Variables.html#Automatic-Variables +# $@ = The filename representing the target. +# $< = The filename of the first prerequisite. +# $(*F) = The stem of the filename of the target (i.e. without .o, .cpp...) +# $^ = The names of all the prerequisites, with spaces between them. + +## Following magic is used to figure out which dot cpp files depend +# on which headers (dot h files) -- automatically (so that we recompile +# only the ncessary dot cpp files when a header is maodified). +# Magic taken from +# http://make.mad-scientist.net/papers/advanced-auto-dependency-generation/ +# ... and then fixed, and fixed, and fixed some more. +DEPDIR := .deps +# From URL: DEPFLAGS = -MT $@ -MMD -MP -MF $(DEPDIR)/$*.d +# Simpler, better... +DEPFLAGS = -MMD -MP -MF $(DEPDIR)/$*.d + +DEPS := $(OBJS:%.o=$(DEPDIR)/%.d) + +#include the deps (minus says don't complain if they don't) +-include $(DEPS) + +# %.o $(DEPDIR)/%.d : %.cpp $(DEPDIR) +%.o : %.cpp $(DEPDIR) + $(CC) -c $(CPPFLAGS) $(DEPFLAGS) -o $*.o $*.cpp + +# Make depdir if it doesn't exist... +$(DEPDIR): ; @mkdir -p $@ + +# generate dep files. Note missing "-o" to keep from generating them too +# $(DEPDIR)/%.d : $(DEPDIR) +# $(CC) -c $(CPPFLAGS) $(DEPFLAGS) $*.cpp + +## Now, the REAL targets -- the things that will get made! + +$(PROGS): $(OBJS) + $(CC) $(CPPFLAGS) $(OBJS) $(LIBS) -o $@ + +clean: + $(RM) -v *~ *.o $(PROGS) tmp.txt + $(RM) -fvr $(DEPDIR) + # starting recursive clean... + cd tests && $(MAKE) clean + +tests: $(PROGS) + # Going to the tests directory for testing + cd tests && $(MAKE) tests diff --git a/recursiveDescentParsers/cplusplus/halfBakedCompiler/README.md b/recursiveDescentParsers/cplusplus/halfBakedCompiler/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1f1c217fd727cad8a8f1da68bce0979ef52dd508 --- /dev/null +++ b/recursiveDescentParsers/cplusplus/halfBakedCompiler/README.md @@ -0,0 +1,68 @@ +Overview +======== + +Everything here is taken from the slides for the "Compiler Construction" +course, i.e. it is directly from Prof. Ronald C. Moore. + +Or at least, after all this time, I really don't remember having stolen +this code from somewhere else, but if you find an older copy that looks +similar, please let me know, so I can give credit where credit is due -- +or disavow knowledge of that source (as the case may be). + +You are in the following subdirectory + ** `interpret++` + This code take mathematical expressions and evaluates them, + i.e. it outputs numbers. It is the same as its sister folder `interpreter` + as far as recursive descent parsing goes, but uses more C++ features and + is set up so as to support growing up to be a larger project. + +See **Chapter 1 Front End Construction**, Slides 21 and 22 (and please let me know when the inevitable day comes that these slide numbers are no longer correct). + +Building and Running +==================== + +This version requires a C++17 compatible C++ compiler, +such as newer versions of `g++` or `clang`. + +Build the program by running `make`. + +In case of doubt, use the voodo command `make clean` and then repeat `make`. + +To test the program, run `make tests`. This also illustrates how the intepreter is used. + +Alternatively, just run the program with *no* parameters (i.e. simply `./intepret++`). +It will tell you how it wants to be run (Hint: there are two usages). + +Contents (Manifest) +==================== + +You should find here: + +* `lexer.cpp` and `lexer.h` + Source code for the lexer. + +* `parser.cpp` and `parser.h` + Source code for the recursive descent parser. + +* `main.cpp` + Source code for `main` -- the driver. + +* `Makefile` + Used to run `make` (obviously?). + +* `tests` + A directory full of test cases. Run `make tests` + (either in directory `tests` or the parent directory) + to run the tests. Not unit tests, rather acceptance tests, + but regression tests all the same. + +* `README.md` + This file. + +Ronald Moore +https://fbi.h-da.de/personen/ronald-moore/ +ronald.moore@h-da.de + + +1 May 2020 + diff --git a/recursiveDescentParsers/cplusplus/halfBakedCompiler/astree.cpp b/recursiveDescentParsers/cplusplus/halfBakedCompiler/astree.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c86dd83cd4974f646315b07ec92dc63bc4a7568f --- /dev/null +++ b/recursiveDescentParsers/cplusplus/halfBakedCompiler/astree.cpp @@ -0,0 +1,90 @@ +// This code made available to his students by +// Prof. Ronald Moore +// https://fbi.h-da.de/personen/ronald-moore/ +// mailto:ronald.moore@h-da.de +// with no warranties whatsoever! + +#include "astree.h" // chain includes lexer.h + +#include <cassert> +#include <iostream> // for std::cout etc. +#include <sstream> +#include <typeinfo> + + +const std::string BaseASTnode::nodeName( ) const { + return typeid( this ).name(); +}; // end nodeName() + +void BaseASTnode::printNodeText( int depth, std::string label ) { + // sanity check + assert( 0 <= depth ); // negative depth not allowed( + assert( depth < 2048 ); // would be very strange (arbitrary limit) + assert( ! label.empty() ); + + // Print preface + std::cout << std::flush; + // Implicit: Do this only if 2 <= depth + for ( int i = 0; i <= depth-2; i++ ) + std::cerr << "| "; // pipe plus three spaces + + // Last stage of preface + if (0 < depth ) std::cerr << "+--> "; // no need to flush cerr! + + // Print label and endline + std::cerr << label << '\n'; // \n is correct for cerr! + +}; // end printNodeText + +void FASTnode::print( int depth ) const { + std::stringstream ss; + ss << nodeName( ); + switch ( data.index( ) ) { + case 0 : // number + ss << " = number " + << std::get< 0 >( data ); + printNodeText( depth, ss.str() ); + break; + + case 1 : // ptr + ss << " = = parenthetical expression "; + printNodeText( depth, ss.str() ); + printChild( depth+1, + std::get< 1 >( data ).get() ); + break; + + default : + std::cerr << "Bad data! in FASTnode::print()\n"; + exit( -3 ); + + }; // end case +}; // end FASTnode::print() + +void T2ndASTnode::print( int depth ) const { + std::string output( nodeName( ) ); + output += ( multiplication ? " multiplier" : " divider" ); + printNodeText( depth, output ); + printChild( depth+1, Fptr.get() ); + printChild( depth+1, T2ptr.get() ); +}; // end T2ndASTnode::print() + +void TASTnode::print( int depth ) const { + printNodeText( depth, nodeName( ) ); + printChild( depth+1, Fptr.get() ); + printChild( depth+1, T2ptr.get() ); +}; // end TASTnode::print() + +void E2ndASTnode::print( int depth ) const { + std::string output( nodeName( ) ); + output += ( addition ? " adder" : " subtracter" ); + printNodeText( depth, output ); + printChild( depth+1, Tptr.get() ); + printChild( depth+1, E2ptr.get() ); +}; // end E2nASTnode::print() + +void EASTnode::print( int depth ) const { + printNodeText( depth, nodeName( ) ); + printChild( depth+1, Tptr.get() ); + printChild( depth+1, E2ptr.get() ); +}; // end TASTnode::print() + diff --git a/recursiveDescentParsers/cplusplus/halfBakedCompiler/astree.h b/recursiveDescentParsers/cplusplus/halfBakedCompiler/astree.h new file mode 100644 index 0000000000000000000000000000000000000000..e5151f2f2f28c062399b1fd334c74ac4b6851477 --- /dev/null +++ b/recursiveDescentParsers/cplusplus/halfBakedCompiler/astree.h @@ -0,0 +1,156 @@ +// This code made available to his students by +// Prof. Ronald Moore +// https://fbi.h-da.de/personen/ronald-moore/ +// mailto:ronald.moore@h-da.de +// with no warranties whatsoever! + +#pragma once + +#include <memory> // for std::unique_ptr +#include <variant> // for std::variant + +#include "lexer.h" + +/// Base class for all expression nodes. +/// Abstract Class ("Interface") since there are pure virtual methods +class BaseASTnode { +protected: // can be used by children, not externally + virtual const std::string nodeName( ) const = 0; + + static void printNodeText( int depth, std::string label ); + + static void printChild( int depth, const BaseASTnode *kid ) { + if ( kid ) + kid->print( depth ); + else // if nullptr + printNodeText( depth, " nullptr child " ); + }; // end printChild + +public: + virtual ~BaseASTnode() {} + + // virtual lex::numberType eval( lex::numberType ) const = 0; // requirement + + virtual void print( int depth ) const = 0; + + +}; // end class BaseASTnode + +// REMINDER +// Grammar: +// E → T E´ +// E´ → + T E´ | - T E´ | ε +// T → F T´ +// T´ → * F T´ | / F T´ | ε +// F → ( E ) | num + +// Forward declaration +class EASTnode ; // see below for real declaration + +// F → ( E ) | num +class FASTnode : public BaseASTnode { +private: + std::variant< lex::numberType, std::unique_ptr<EASTnode> > data; + +protected: + virtual const std::string nodeName( ) const { return "F"; }; + +public: + // Two constructors + FASTnode( std::unique_ptr<EASTnode> Eptr ) + : data( std::move( Eptr )) {}; + + FASTnode( lex::numberType num ) + : data( num ) {}; + + virtual void print( int depth ) const; + +}; + +// T´ → * F T´ | / F T´ | ε +class T2ndASTnode : public BaseASTnode { +private: + bool multiplication; // i.e. not division + std::unique_ptr< FASTnode > Fptr; + std::unique_ptr< T2ndASTnode > T2ptr; + +protected: + virtual const std::string nodeName( ) const { return "T'"; }; + +public: + // Constructor + T2ndASTnode( bool multiplier, // not divider + std::unique_ptr< FASTnode > f, + std::unique_ptr< T2ndASTnode > t2nd ) + : multiplication( multiplier ), + Fptr( std::move( f ) ), + T2ptr(std::move( t2nd )) {}; + + virtual void print( int depth ) const; + +}; + +// T → F T´ +class TASTnode : public BaseASTnode { +private: + std::unique_ptr<FASTnode> Fptr; + std::unique_ptr<T2ndASTnode> T2ptr; + +protected: + virtual const std::string nodeName( ) const { return "T"; }; + +public: + // Constructor + TASTnode( std::unique_ptr<FASTnode> f, + std::unique_ptr<T2ndASTnode> t2nd ) + : Fptr(std::move( f )), + T2ptr(std::move( t2nd )) {}; + + virtual void print( int depth ) const; + +}; + +// E´ → + T E´ | - T E´ | ε +class E2ndASTnode : public BaseASTnode { +private: + bool addition; // i.e. not subtraction + std::unique_ptr< TASTnode > Tptr; + std::unique_ptr< E2ndASTnode > E2ptr; + +protected: + virtual const std::string nodeName( ) const { return "E'"; }; + +public: + // Constructor + E2ndASTnode( bool adder, // not subtracter + std::unique_ptr< TASTnode > t, + std::unique_ptr< E2ndASTnode > e2nd ) + : addition( adder ), + Tptr( std::move( t ) ), + E2ptr(std::move( e2nd )) {} + + virtual void print( int depth ) const; + +}; + +// E → T E´ +class EASTnode : public BaseASTnode { +private: + std::unique_ptr<TASTnode> Tptr; + std::unique_ptr<E2ndASTnode> E2ptr; + +protected: + virtual const std::string nodeName( ) const { return "E"; }; + +public: + EASTnode( std::unique_ptr<TASTnode> t, + std::unique_ptr<E2ndASTnode> e2nd ) + : Tptr(std::move( t )), + E2ptr(std::move( e2nd )) {} + + virtual void print( int depth ) const; + +}; + + + diff --git a/recursiveDescentParsers/cplusplus/halfBakedCompiler/lexer.cpp b/recursiveDescentParsers/cplusplus/halfBakedCompiler/lexer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f09c5715285c145cd768084b4165b3738f34cf61 --- /dev/null +++ b/recursiveDescentParsers/cplusplus/halfBakedCompiler/lexer.cpp @@ -0,0 +1,141 @@ +// This code made available to his students by +// Prof. Ronald Moore +// https://fbi.h-da.de/personen/ronald-moore/ +// mailto:ronald.moore@h-da.de +// with no warranties whatsoever! + +#include "lexer.h" + +#include <cassert> +#include <cctype> // for isspace +#include <cstdlib> // for strtod +#include <iostream> +#include <fstream> + +namespace lex { // continue to define things in lex:: + +// instantiate here... +lex::Token next_token; // again with the global variables... + +// global variables -- sue me if you don't like that! +static std::string inputSourceName( "standard input" ); +static std::istream *input = &(std::cin); // until proven otherwise + +static std::string currentLine( "" ); +static int currentLineNumber = -1; +static int currentColumnNumber = 0; + +// Namespace "member functions" +// ============================ + +void printInputLocation( ) { + std::cout << inputSourceName + << " (" << currentLineNumber + << ',' << currentColumnNumber + << "):" << std::endl; + std::cout << currentLine << std::endl; + for ( int col = 0; col < currentColumnNumber; col++ ) + std::cout << '-'; + std::cout << '^' << std::endl; +} // end printInputLocation + +void printErrorMsg( const std::string Error ) +{ + printInputLocation( ); + std::cout << "ERROR : " << Error << std::endl; + advance_token( ); // Don't want to get stuck here. +} // end printErrorMsg + +// Utility skippedWhiteSpace... +static bool skippedWhiteSpace( ) { // return true if not at EOF, i.e. if skipped + while ( true ) { + int currentLineLength = currentLine.length(); + while ( currentColumnNumber < currentLineLength ) + if ( isspace( currentLine[ currentColumnNumber ] ) ) + currentColumnNumber++; + else // if NOT isspace() + return true; + + // if we're here, we're at the end of a line. + std::getline( *input, currentLine ); + currentLineNumber++; + currentColumnNumber = 0; + if ( ! *input ) // EOF!! + return false; + // else, repeat! + // Which is the same as + // return skippedWhiteSpace() -- i.e. tail recursion. + }; +}; + +Token gettok( ) { + assert( input ); // we assume nullptr != input + + Token result( bad_tok, '\0' ); // default + + if ( ! *input ) return result; // i.e. bad_tok + // else, we can read from input + + // Skip white space, going to next line as necessary + if ( ! skippedWhiteSpace( ) ) { + result.first = lex::tok_eof; + return result; + }; + + // else -- not eof, d.h. we have visible text in front of us. + char currentChar = currentLine[ currentColumnNumber ]; + result.second = currentChar; // unless it's a number, etc. + currentColumnNumber++; // usually, but see num... + switch ( currentChar ) { + case '(' : result.first = tok_lparen; + break; + case ')' : result.first = tok_rparen; + break; + case '+' : result.first = tok_plus; + break; + case '-' : result.first = tok_minus; + break; + case '*' : result.first = tok_times; + break; + case '/' : result.first = tok_div; + break; + default : + // either we have a number in front of us, or we don't + assert( 0 < currentColumnNumber ); // remember, incremented! + char *alpha = &(currentLine[ currentColumnNumber-1 ]); + // minus one because we incremented it before the switch + char *omega = nullptr; // until we call strtod... + double tmpValue = strtod( alpha, &omega ); + // strtod sets omega to the first char after the number + if ( alpha == omega ) { + result.second = *omega; // or *alpha, they're the same... + return result; // i.e. bad_tok !!! + }; + // else if strtod found a real number (or at least a double) + result.first = tok_number; + result.second = tmpValue; // let C++ do any converisons + currentColumnNumber += (omega - alpha) -1; + // minus one because we incremented it before the switch + + }; // end switch + return result; +} // end gettok + +void openInputSource( std::string filename ) { + assert( ! filename.empty() ); // caller should check that + inputSourceName = filename; + if ( "-" == filename ) + input = &(std::cin); + else { // if fileName is not "-" (a dash) + static std::ifstream ifs( filename, std::ifstream::in ); + if ( ! ifs.good( ) ) { + std::cerr << "ERROR opening file name " << filename + << " -- could not open.\n"; + exit( -2 ); + }; + // else if ifs is good + input = &ifs; + }; +} // end of openInputFile + +} // end namespace lex diff --git a/recursiveDescentParsers/cplusplus/halfBakedCompiler/lexer.h b/recursiveDescentParsers/cplusplus/halfBakedCompiler/lexer.h new file mode 100644 index 0000000000000000000000000000000000000000..6dd74587d84d5e93da16dfbdf2e3b103a2ca8e34 --- /dev/null +++ b/recursiveDescentParsers/cplusplus/halfBakedCompiler/lexer.h @@ -0,0 +1,69 @@ +// This code made available to his students by +// Prof. Ronald Moore +// https://fbi.h-da.de/personen/ronald-moore/ +// mailto:ronald.moore@h-da.de +// with no warranties whatsoever! + +#pragma once + +#include <cctype> // for isspace +#include <cstdlib> // for strtod +#include <string> +#include <cmath> // for NAN +#include <utility> // for std::pair +#include <variant> // new C++17 feature! Like unions, only better! + +// =================== +// LEXICAL ANALYSIS +// The following are taken to be tokens: +// left and right parenthesis, the plus and minus characters, +// as well as asterisk and forward slash -- and numbers. +// In the script, substraction and division are not supported, +// but it seems like time to add them. + +// Preliminaries and Utilities +// ============================ + +namespace lex { + +// Utility Types +typedef double numberType; // feel fee to change this to something else like int or float or bigint.... + +// See https://en.cppreference.com/w/cpp/numeric/math/nan +static const numberType bad_number = std::nan( "1" ); + +// Tokens -- are a pair of a tag and a value, where the value can be +// various things - a char or a numberType at present, but names and +// multicharacter operators could be added later +typedef enum { + tok_number = 'n', + tok_lparen = '(', + tok_rparen = ')', + tok_plus = '+', + tok_minus = '-', + tok_times = '*', + tok_div = '/', + tok_eof = 'E', + bad_tok = 'X' +} TokenTag; + +typedef std::variant< char, numberType > TokenValue; + +typedef std::pair< TokenTag, TokenValue > Token; + +extern Token next_token; // again with the global variables... { + +// Functons (or methods, if you prefer) + +void printInputLocation( ); + +void printErrorMsg( const std::string Error ); + +Token gettok( ); // + +// DRY -- this line is repeated so often, it deserves its own function +inline void advance_token( ) { next_token = gettok(); } // eats current token! + +void openInputSource( std::string filename ); + +} // end namespace lex diff --git a/recursiveDescentParsers/cplusplus/halfBakedCompiler/main.cpp b/recursiveDescentParsers/cplusplus/halfBakedCompiler/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5156d4fa813b9b0eb863cda5b202884d7d95086d --- /dev/null +++ b/recursiveDescentParsers/cplusplus/halfBakedCompiler/main.cpp @@ -0,0 +1,52 @@ +// This code made available to his students by +// Prof. Ronald Moore +// https://fbi.h-da.de/personen/ronald-moore/ +// mailto:ronald.moore@h-da.de +// with no warranties whatsoever! + +#include <iostream> + +#include "parser.h" // this chain-includes lexer.h + +// main (!) +// ========= + +int main( int argc, char **argv ) { + + if (2 != argc) { + std::cerr << "Usage: " << argv[0] << " <fileName> \n" + << " " << argv[0] << "- (to read from standard input) \n" + << "You provided " << argc-1 << " arguments, we take exactly one (only).\n"; + return( -1 ); + }; + // else if 1 == argc .... + lex::openInputSource( argv[1] ); + + // Prime the pump! + lex::advance_token( ); + + // get tokens and dump them... + while ( lex::tok_eof != lex::next_token.first ) { + std::cout << "\n=======================\n" + << "Reading from "; + lex::printInputLocation( ); + auto tree = parse::E( ); + if ( ! tree ) + std::cout << "Compiler built empty tree!?!" << std::endl; + else { + std::cout << "Compiler built this tree:" << std::endl; + tree->print( 0 ); // zero starting depth + }; // end else if tree not nullptr + + if ( lex::bad_tok == lex::next_token.first ) { + std::string errorMsg( "Unrecognized character(s)=" ); + errorMsg.push_back( std::get< char >( lex::next_token.second ) ); + lex::printErrorMsg( errorMsg ); + }; // end if bad token + // since tree is a lost when leaving its scope, + // we don't have to delete it (or do we?) + }; // end while not eof + std::cout << "End Of File!" << std::endl; + + return 0; // Alles klar!!! +} diff --git a/recursiveDescentParsers/cplusplus/halfBakedCompiler/parser.cpp b/recursiveDescentParsers/cplusplus/halfBakedCompiler/parser.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0585a6c57889fb187b9a1f4028ef2cbef5c66678 --- /dev/null +++ b/recursiveDescentParsers/cplusplus/halfBakedCompiler/parser.cpp @@ -0,0 +1,130 @@ +// This code made available to his students by +// Prof. Ronald Moore +// https://fbi.h-da.de/personen/ronald-moore/ +// mailto:ronald.moore@h-da.de +// with no warranties whatsoever! + + +#include <cassert> + +#include "parser.h" + +namespace parse { + + +// PARSING!!! +// =========== +// +// The grammar we are going to parse here is: +// Grammar: +// E → T E´ +// E´ → + T E´ | - T E´ | ε +// T → F T´ +// T´ → * F T´ | / F T´ | ε +// F → ( E ) | num +// Note that the recursive descent function for (e.g.) E´ +// is nameded "E2ndHalf"- + +// For every non-terminal (E, E2ndHalf, T, etc.) there is one function: + +// E → T E´ +std::unique_ptr< EASTnode > E() { + auto t( T( ) ); + if ( t ) { + auto e2( E2ndHalf( ) ); + return std::make_unique< EASTnode >( std::move( t ), std::move( e2 ) ); + }; // else + return nullptr; +}; // end E() + +// T → F T´ +std::unique_ptr< TASTnode > T() { + auto f = F( ); + if ( f ) { + auto t2 = T2ndHalf( ); + return std::make_unique< TASTnode >( std::move( f ), std::move( t2 )); + }; // else + return nullptr; +}; // end F() + + +// E´ → + T E´ | - T E´ | ε +std::unique_ptr< E2ndASTnode > E2ndHalf() { + bool adder = true; // until proven false + switch ( lex::next_token.first ) { + case lex::tok_minus : + adder = false; + // fall through (no break!) + case lex::tok_plus : + lex::advance_token( ); // eat + + { auto t = T( ); + if ( t ) { + auto e2 = E2ndHalf( ); + return std::make_unique< E2ndASTnode >( adder, + std::move( t ), + std::move( e2 )); + }; // else if t is null + return nullptr; + }; // end t's scope + + default : // not an error, epsilon !! + return nullptr; + + }; +} // end E2ndHalf + +// T´ → * F T´ | / F T´ | ε +std::unique_ptr< T2ndASTnode > T2ndHalf() { + bool mult = true; // until proven false + switch ( lex::next_token.first ) { + case lex::tok_div : + mult = false; + // fall through (no break!) + case lex::tok_times : + lex::advance_token( ); // eat + + { auto f = F( ); + if ( f ) { + auto t2 = T2ndHalf( ); + return std::make_unique< T2ndASTnode >( mult, + std::move( f ), + std::move( t2 )); + }; // else if t is null + return nullptr; + }; // end f's scope + + default : // not an error, epsilon !! + return nullptr; + + }; +} // end T2ndHalf + +// F → ( E ) | num +std::unique_ptr< FASTnode > F() { + switch ( lex::next_token.first ) { + case lex::tok_lparen : + lex::advance_token( ); // eat lparen + { auto e = E(); + if ( lex::tok_rparen == lex::next_token.first ) { + lex::advance_token( ); // eat rparen + return std::make_unique< FASTnode >( std::move( e )); + }; + // else if rparen not found + lex::printErrorMsg( "Expected Right Parenthesis" ); + return nullptr; + }; // end e's scope + + case lex::tok_number : + { auto n = std::get< lex::numberType >( lex::next_token.second ); + lex::advance_token( ); // eat id + return std::make_unique< FASTnode >( n ); + }; // end n's scope + + default : + lex::printErrorMsg( "Expected Left Parenthesis or number" ); + return nullptr; + }; // end switch + assert( false ); // if we ever get here, something's very wrong +} + + +} // end namespace parse diff --git a/recursiveDescentParsers/cplusplus/halfBakedCompiler/parser.h b/recursiveDescentParsers/cplusplus/halfBakedCompiler/parser.h new file mode 100644 index 0000000000000000000000000000000000000000..af54431e6d13148bc75e69850993acb4f7257aec --- /dev/null +++ b/recursiveDescentParsers/cplusplus/halfBakedCompiler/parser.h @@ -0,0 +1,33 @@ +// This code made available to his students by +// Prof. Ronald Moore +// https://fbi.h-da.de/personen/ronald-moore/ +// mailto:ronald.moore@h-da.de +// with no warranties whatsoever! + +#pragma once + +#include "astree.h" // chain includes lexer.h + +namespace parse { + +// PARSING!!! +// =========== +// +// The grammar we are going to parse here is: +// Grammar: +// E → T E´ +// E´ → + T E´ | - T E´ | ε +// T → F T´ +// T´ → * F T´ | / F T´ | ε +// F → ( E ) | num +// Note that the recursive descent function for (e.g.) E´ +// is nameded "E2ndHalf"- + +// Forward Declarations +std::unique_ptr< EASTnode > E(); +std::unique_ptr< E2ndASTnode > E2ndHalf(); +std::unique_ptr< TASTnode > T(); +std::unique_ptr< T2ndASTnode > T2ndHalf(); +std::unique_ptr< FASTnode > F(); + +} // end namespace parse diff --git a/recursiveDescentParsers/cplusplus/halfBakedCompiler/tests/Makefile b/recursiveDescentParsers/cplusplus/halfBakedCompiler/tests/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..90989ce20a57403f58233c6fd174c96614a5066c --- /dev/null +++ b/recursiveDescentParsers/cplusplus/halfBakedCompiler/tests/Makefile @@ -0,0 +1,106 @@ +# This Makefile made available to his students by +# Prof. Ronald Moore +# https://fbi.h-da.de/personen/ronald-moore/ +# mailto:ronald.moore@h-da.de +# with no warranties whatsoever + + +## More preliminaries +# See https://www.gnu.org/software/make/manual/html_node/Special-Targets.html +# In this makefile, we want to keep going even if we find errors +.IGNORE : + +.NOTPARALLEL : + +## Define test collections - each line should contain one or more filenames +GOOD-INPUTS := goodTest.input +BAD-INPUTS := badTest.input + +# the sum of all tests +INPUTS := $(GOOD-INPUTS) $(BAD-INPUTS) + +# reference files ("correct" output) +REFERENCES := $(INPUTS:.input=.reference) + +# output files +OUTPUTS := $(INPUTS:.input=.output) + +# Program we'll be testing +PROGRAM := ../halfbaker + +# Tell make that the following "targets" are "phony" +# Cf. https://www.gnu.org/software/make/manual/html_node/Phony-Targets.html#Phony-Targets +.PHONY : all clean tests goodtests badtests + +## Now, the targets -- the things that will get made! + +all: tests + +# For convenience, "make check" == "make test" +check: tests + +# TESTING +# Calling "make test" should +# (1) make the programs (if necessary) +# (2) erase all the test outputs +# (3) create them again (see rules, below). +# (4) run the special case "bigtest" (see below) +tests: $(PROGRAM) + cd .. && $(MAKE) + $(MAKE) clean + $(MAKE) updatetests + +updatetests: $(OUTPUTS) + +$(PROGRAM) : + cd .. + $(MAKE) + +# "make clean" or equivalently "make testclean" deletes all files created by testing +clean: testclean + +testclean: + $(RM) -fv *.output *~ + +# By the way, for more information about calling make from make, see +# https://www.gnu.org/software/make/manual/html_node/Recursion.html#Recursion + +# User-defined function (Cf. http://oreilly.com/catalog/make3/book/ch04.pdf) +# Takes two arguments - an output file and a reference output file +# (the two files should be the same). Function prints "success" or "failure", +# depending on whether the two files are equal. +#define testReferenceOutput +# @cmp -s $1 $2 \ +# && /bin/echo -e "Test against $2 successful" \ +# || /bin/echo -e "\n\tTest against $2 FAILED!!!\n" +#endef + +#### If your terminal supports colors, comment out the version above, +# and use this version instead - good tests have a green background, +# failed tests have a red background. See +# <http://misc.flogisoft.com/bash/tip_colors_and_formatting> +# for information about output formatting (colors, bold, etc.)). + +define testReferenceOutput +# @cmp -s $1 $2 + @diff -qs $1 $2 \ + && /bin/echo -e "\e[1;42mTest against $2 successful.\e[0m" \ + || /bin/echo -e "\e[1;41mTest against $2 FAILED!!! \e[0m" +endef + +# Reminder... +# $@ = The filename representing the target. +# $< = The filename of the first prerequisite. +# $* = The stem of the target (i.e. without .o, .cpp...) +# $(*F) = The stem of the target (i.e. without .o, .cpp... AND without directory) +# $^ = The names of all the prerequisites, with spaces between them. + + +################ Assembler Tests ################### +# For some files x.job, we have stored the "correct" (expected) output in x.ref +# (where "ref" is short for "reference"). +$(OUTPUTS): %.output: %.input %.reference + $(PROGRAM) $< >$@ 2>&1 + $(call testReferenceOutput,$@, $*.reference) + +# Finished! diff --git a/recursiveDescentParsers/cplusplus/halfBakedCompiler/tests/badTest.input b/recursiveDescentParsers/cplusplus/halfBakedCompiler/tests/badTest.input new file mode 100644 index 0000000000000000000000000000000000000000..99faffe929d271a664703888821ac01a3bf91828 --- /dev/null +++ b/recursiveDescentParsers/cplusplus/halfBakedCompiler/tests/badTest.input @@ -0,0 +1,8 @@ +42? +42^2 +(42)) +((42 +(42 / 0) +(42 / (1-1)) +42 +/- 3.14159 + diff --git a/recursiveDescentParsers/cplusplus/halfBakedCompiler/tests/badTest.reference b/recursiveDescentParsers/cplusplus/halfBakedCompiler/tests/badTest.reference new file mode 100644 index 0000000000000000000000000000000000000000..107f82c1383fee2a11f4604bce221d63cbcb368b --- /dev/null +++ b/recursiveDescentParsers/cplusplus/halfBakedCompiler/tests/badTest.reference @@ -0,0 +1,176 @@ + +======================= +Reading from badTest.input (0,2): +42? +--^ +Compiler built this tree: +E ++--> T +| +--> F = number 42 +| +--> nullptr child ++--> nullptr child +badTest.input (0,3): +42? +---^ +ERROR : Unrecognized character(s)=? + +======================= +Reading from badTest.input (1,2): +42^2 +--^ +Compiler built this tree: +E ++--> T +| +--> F = number 42 +| +--> nullptr child ++--> nullptr child +badTest.input (1,3): +42^2 +---^ +ERROR : Unrecognized character(s)=^ + +======================= +Reading from badTest.input (1,4): +42^2 +----^ +Compiler built this tree: +E ++--> T +| +--> F = number 2 +| +--> nullptr child ++--> nullptr child + +======================= +Reading from badTest.input (2,1): +(42)) +-^ +Compiler built this tree: +E ++--> T +| +--> F = = parenthetical expression +| | +--> E +| | | +--> T +| | | | +--> F = number 42 +| | | | +--> nullptr child +| | | +--> nullptr child +| +--> nullptr child ++--> nullptr child + +======================= +Reading from badTest.input (2,5): +(42)) +-----^ +badTest.input (2,5): +(42)) +-----^ +ERROR : Expected Left Parenthesis or number +Compiler built empty tree!?! + +======================= +Reading from badTest.input (3,1): +((42 +-^ +badTest.input (4,1): +(42 / 0) +-^ +ERROR : Expected Right Parenthesis +badTest.input (4,3): +(42 / 0) +---^ +ERROR : Expected Right Parenthesis +Compiler built empty tree!?! + +======================= +Reading from badTest.input (4,5): +(42 / 0) +-----^ +badTest.input (4,5): +(42 / 0) +-----^ +ERROR : Expected Left Parenthesis or number +Compiler built empty tree!?! + +======================= +Reading from badTest.input (4,7): +(42 / 0) +-------^ +Compiler built this tree: +E ++--> T +| +--> F = number 0 +| +--> nullptr child ++--> nullptr child + +======================= +Reading from badTest.input (4,8): +(42 / 0) +--------^ +badTest.input (4,8): +(42 / 0) +--------^ +ERROR : Expected Left Parenthesis or number +Compiler built empty tree!?! + +======================= +Reading from badTest.input (5,1): +(42 / (1-1)) +-^ +Compiler built this tree: +E ++--> T +| +--> F = = parenthetical expression +| | +--> E +| | | +--> T +| | | | +--> F = number 42 +| | | | +--> T' divider +| | | | | +--> F = = parenthetical expression +| | | | | | +--> E +| | | | | | | +--> T +| | | | | | | | +--> F = number 1 +| | | | | | | | +--> nullptr child +| | | | | | | +--> E' subtracter +| | | | | | | | +--> T +| | | | | | | | | +--> F = number 1 +| | | | | | | | | +--> nullptr child +| | | | | | | | +--> nullptr child +| | | | | +--> nullptr child +| | | +--> nullptr child +| +--> nullptr child ++--> nullptr child + +======================= +Reading from badTest.input (6,2): +42 +/- 3.14159 +--^ +badTest.input (6,5): +42 +/- 3.14159 +-----^ +ERROR : Expected Left Parenthesis or number +Compiler built this tree: +E ++--> T +| +--> F = number 42 +| +--> nullptr child ++--> nullptr child + +======================= +Reading from badTest.input (6,6): +42 +/- 3.14159 +------^ +badTest.input (6,6): +42 +/- 3.14159 +------^ +ERROR : Expected Left Parenthesis or number +Compiler built empty tree!?! + +======================= +Reading from badTest.input (6,14): +42 +/- 3.14159 +--------------^ +Compiler built this tree: +E ++--> T +| +--> F = number 3.14159 +| +--> nullptr child ++--> nullptr child +End Of File! diff --git a/recursiveDescentParsers/cplusplus/halfBakedCompiler/tests/goodTest.input b/recursiveDescentParsers/cplusplus/halfBakedCompiler/tests/goodTest.input new file mode 100644 index 0000000000000000000000000000000000000000..7c49594be4b299e555ffbe7865109c33dcbf9497 --- /dev/null +++ b/recursiveDescentParsers/cplusplus/halfBakedCompiler/tests/goodTest.input @@ -0,0 +1,11 @@ +42 +40 + 2 +(44.44 - 2.44) +(21 * 2.0) +(84.0 / (3-1)) +88 / 2 - 2 +88 / (8 / 4) - 2 +44.0 - 2 +(4 * 11.0) - 2 +(8 * 11.0) / 2 - 2 +(8 * 11.0) / (8 / 4) - 2 diff --git a/recursiveDescentParsers/cplusplus/halfBakedCompiler/tests/goodTest.reference b/recursiveDescentParsers/cplusplus/halfBakedCompiler/tests/goodTest.reference new file mode 100644 index 0000000000000000000000000000000000000000..39c5a18ab01f14087261197a86e8df39c01bf162 --- /dev/null +++ b/recursiveDescentParsers/cplusplus/halfBakedCompiler/tests/goodTest.reference @@ -0,0 +1,225 @@ + +======================= +Reading from goodTest.input (0,2): +42 +--^ +Compiler built this tree: +E ++--> T +| +--> F = number 42 +| +--> nullptr child ++--> nullptr child + +======================= +Reading from goodTest.input (1,2): +40 + 2 +--^ +Compiler built this tree: +E ++--> T +| +--> F = number 40 +| +--> nullptr child ++--> E' adder +| +--> T +| | +--> F = number 2 +| | +--> nullptr child +| +--> nullptr child + +======================= +Reading from goodTest.input (2,1): +(44.44 - 2.44) +-^ +Compiler built this tree: +E ++--> T +| +--> F = = parenthetical expression +| | +--> E +| | | +--> T +| | | | +--> F = number 44.44 +| | | | +--> nullptr child +| | | +--> E' subtracter +| | | | +--> T +| | | | | +--> F = number 2.44 +| | | | | +--> nullptr child +| | | | +--> nullptr child +| +--> nullptr child ++--> nullptr child + +======================= +Reading from goodTest.input (3,1): +(21 * 2.0) +-^ +Compiler built this tree: +E ++--> T +| +--> F = = parenthetical expression +| | +--> E +| | | +--> T +| | | | +--> F = number 21 +| | | | +--> T' multiplier +| | | | | +--> F = number 2 +| | | | | +--> nullptr child +| | | +--> nullptr child +| +--> nullptr child ++--> nullptr child + +======================= +Reading from goodTest.input (4,1): +(84.0 / (3-1)) +-^ +Compiler built this tree: +E ++--> T +| +--> F = = parenthetical expression +| | +--> E +| | | +--> T +| | | | +--> F = number 84 +| | | | +--> T' divider +| | | | | +--> F = = parenthetical expression +| | | | | | +--> E +| | | | | | | +--> T +| | | | | | | | +--> F = number 3 +| | | | | | | | +--> nullptr child +| | | | | | | +--> E' subtracter +| | | | | | | | +--> T +| | | | | | | | | +--> F = number 1 +| | | | | | | | | +--> nullptr child +| | | | | | | | +--> nullptr child +| | | | | +--> nullptr child +| | | +--> nullptr child +| +--> nullptr child ++--> nullptr child + +======================= +Reading from goodTest.input (5,2): +88 / 2 - 2 +--^ +Compiler built this tree: +E ++--> T +| +--> F = number 88 +| +--> T' divider +| | +--> F = number 2 +| | +--> nullptr child ++--> E' subtracter +| +--> T +| | +--> F = number 2 +| | +--> nullptr child +| +--> nullptr child + +======================= +Reading from goodTest.input (6,2): +88 / (8 / 4) - 2 +--^ +Compiler built this tree: +E ++--> T +| +--> F = number 88 +| +--> T' divider +| | +--> F = = parenthetical expression +| | | +--> E +| | | | +--> T +| | | | | +--> F = number 8 +| | | | | +--> T' divider +| | | | | | +--> F = number 4 +| | | | | | +--> nullptr child +| | | | +--> nullptr child +| | +--> nullptr child ++--> E' subtracter +| +--> T +| | +--> F = number 2 +| | +--> nullptr child +| +--> nullptr child + +======================= +Reading from goodTest.input (7,4): +44.0 - 2 +----^ +Compiler built this tree: +E ++--> T +| +--> F = number 44 +| +--> nullptr child ++--> E' subtracter +| +--> T +| | +--> F = number 2 +| | +--> nullptr child +| +--> nullptr child + +======================= +Reading from goodTest.input (8,1): +(4 * 11.0) - 2 +-^ +Compiler built this tree: +E ++--> T +| +--> F = = parenthetical expression +| | +--> E +| | | +--> T +| | | | +--> F = number 4 +| | | | +--> T' multiplier +| | | | | +--> F = number 11 +| | | | | +--> nullptr child +| | | +--> nullptr child +| +--> nullptr child ++--> E' subtracter +| +--> T +| | +--> F = number 2 +| | +--> nullptr child +| +--> nullptr child + +======================= +Reading from goodTest.input (9,1): +(8 * 11.0) / 2 - 2 +-^ +Compiler built this tree: +E ++--> T +| +--> F = = parenthetical expression +| | +--> E +| | | +--> T +| | | | +--> F = number 8 +| | | | +--> T' multiplier +| | | | | +--> F = number 11 +| | | | | +--> nullptr child +| | | +--> nullptr child +| +--> T' divider +| | +--> F = number 2 +| | +--> nullptr child ++--> E' subtracter +| +--> T +| | +--> F = number 2 +| | +--> nullptr child +| +--> nullptr child + +======================= +Reading from goodTest.input (10,1): +(8 * 11.0) / (8 / 4) - 2 +-^ +Compiler built this tree: +E ++--> T +| +--> F = = parenthetical expression +| | +--> E +| | | +--> T +| | | | +--> F = number 8 +| | | | +--> T' multiplier +| | | | | +--> F = number 11 +| | | | | +--> nullptr child +| | | +--> nullptr child +| +--> T' divider +| | +--> F = = parenthetical expression +| | | +--> E +| | | | +--> T +| | | | | +--> F = number 8 +| | | | | +--> T' divider +| | | | | | +--> F = number 4 +| | | | | | +--> nullptr child +| | | | +--> nullptr child +| | +--> nullptr child ++--> E' subtracter +| +--> T +| | +--> F = number 2 +| | +--> nullptr child +| +--> nullptr child +End Of File!