diff --git a/recursiveDescentParsers/cplusplus/interpreter/interpreter b/recursiveDescentParsers/cplusplus/interpreter/interpreter index b4110dc560d39b33753aec8ba003afa3bec6f815..066ac63171237fe93a4d8312b1af2aa713881b14 100755 Binary files a/recursiveDescentParsers/cplusplus/interpreter/interpreter and b/recursiveDescentParsers/cplusplus/interpreter/interpreter differ diff --git a/recursiveDescentParsers/cplusplus/interpreter/interpreter.cpp b/recursiveDescentParsers/cplusplus/interpreter/interpreter.cpp index f954cb5b38ba6987839b14b031a89dd477bddb67..5996dd5844aad6e7105d34e19e394bfc535034c0 100644 --- a/recursiveDescentParsers/cplusplus/interpreter/interpreter.cpp +++ b/recursiveDescentParsers/cplusplus/interpreter/interpreter.cpp @@ -2,59 +2,158 @@ // Prof. Ronald Moore // https://fbi.h-da.de/personen/ronald-moore/ // mailto:ronald.moore@h-da.de -// with no warranties whatsoever -// -// The grammar we are going to parse here is: -// Grammar: -// E → T E´ -// E´ → + T E´ | - T E´ | ε -// T → F T´ -// T´ → * F T´ | / F T´ | ε -// F → ( E ) | num -// where the following are taken to be tokens: +// with no warranties whatsoever! + + +#include <cassert> +#include <cctype> // for isspace +#include <cstdlib> // for strtod +#include <iostream> +#include <fstream> +#include <string> +// include <vector> + +// =================== +// LEXICAL ANALYSIS +// The following are taken to be tokens: // left and right parenthesis, the plus and minus characters, // as well as asterisk and forward slash -- and numbers. // In the script, substraction and division are not supported, // but it seems like time to add them. -// -// Note that the recursive descent function for (e.g.) E´ -// is nameded "E2ndHalf"- - -#include <cstdio> -#include <cstdlib> -#include <string> -#include <vector> // Preliminaries and Utilities // ============================ -// global variables -- sue me if you don't like that! -std::string currentLine( "" ); -int currentLineNumber = 0; -int currentColumnNumber = 0; -int currentTokenLength = 0; // Utility Types typedef double numberType; // feel fee to change this to something else like int or float or bigint.... +typedef enum Token { + tok_number = 'n', + tok_lparen = '(', + tok_rparen = ')', + tok_plus = '+', + tok_minus = '-', + tok_times = '*', + tok_div = '/', + tok_eof = 'E', + bad_tok = 'X' +} Token; + +// global variables -- sue me if you don't like that! +static std::istream *input = &(std::cin); // until proven otherwise +static std::string currentLine( "" ); +static int currentLineNumber = -1; +static int currentColumnNumber = 0; +static int currentTokenLength = 0; -// the tokens -enum Token { - tok_number, - tok_lparen, - tok_rparen, - tok_plus, - tok_minus, - tok_times, - tok_div -} next_token; // again with the global variables... +static Token next_token; // again with the global variables... +static numberType currentNumber; // = zero.... // The Lexer // ========== +static bool skippedWhiteSpace( ) { // return true if not at EOF, i.e. if skipped + while ( true ) { + int currentLineLength = currentLine.length(); + while ( currentColumnNumber < currentLineLength ) + if ( isspace( currentLine[ currentColumnNumber ] ) ) + currentColumnNumber++; + else // if NOT isspace() + return true; + + // if we're here, we're at the end of a line. + std::getline( *input, currentLine ); + currentLineNumber++; + currentColumnNumber = 0; + if ( ! *input ) // EOF!! + return false; + // else, repeat! + // Which is the same as + // return skippedWhiteSpace() -- i.e. tail recursion. + }; +}; +static Token gettok( ) { + assert( input ); // we assume nullptr != input + if ( ! *input ) return bad_tok; + // else, we can read from input + // Skip white space, going to next line as necessary + if ( ! skippedWhiteSpace( ) ) return tok_eof; + + // We're have visible text in front of us. + char currentChar = currentLine[ currentColumnNumber ]; + currentColumnNumber++; // usually, but see num... + switch ( currentChar ) { + case '(' : return tok_lparen; + case ')' : return tok_rparen; + case '+' : return tok_plus; + case '-' : return tok_minus; + case '*' : return tok_times; + case '/' : return tok_div; + default : + // either we have a number in front of us, or we don't + assert( 0 < currentColumnNumber ); + char *alpha = &(currentLine[ currentColumnNumber-1 ]); + // minus one because we incremented it before the switch + char *omega = nullptr; // until we call strtod... + double tmpValue = strtod( alpha, &omega ); + if ( alpha == omega ) { + return bad_tok; // !!! + }; + // else if strtod found a real number (or at least a double) + currentNumber = tmpValue; // let C++ do the converison + currentColumnNumber += (omega - alpha) -1; + // minus one because we incremented it before the switch + return tok_number; + + }; // end switch + assert( false ); // we should never get here! + return bad_tok; +} // end gettok + +// PARSING!!! +// =========== +// +// The grammar we are going to parse here is: +// Grammar: +// E → T E´ +// E´ → + T E´ | - T E´ | ε +// T → F T´ +// T´ → * F T´ | / F T´ | ε +// F → ( E ) | num +// Note that the recursive descent function for (e.g.) E´ +// is nameded "E2ndHalf"- + + + + // main (!) // ========= int main( int argc, char **argv ) { + if (2 != argc) { + std::cerr << "Usage: " << argv[0] << " <fileName>.\n" + << "You provided " << argc-1 << " arguments, we take exactly one (only).\n"; + return( -1 ); + }; + // else if 1 == argc .... + std::string fileName( argv[1] ); + if ( "-" != fileName ) { + static std::ifstream ifs( fileName ); + input = &ifs; + } + + // Prime the pump! + next_token = gettok( ); + + // get tokens and dump them... + while ( tok_eof != next_token ) { + std::cout << "TOKEN = " << (char)next_token + << " current num = " << currentNumber + << std::endl; + next_token = gettok(); + }; + std::cout << "EOF" << std::endl; + return 0; // Alles klar!!! }