diff --git a/flexBison/cplusplus/calc++.cc b/flexBison/cplusplus/calc++.cc index 7c8440c1d46ed927857bf3082702cdfe23b06517..311e4eb67af41fda066cb8d9259029cfd8f13565 100644 --- a/flexBison/cplusplus/calc++.cc +++ b/flexBison/cplusplus/calc++.cc @@ -6,6 +6,8 @@ int main (int argc, char *argv[]) { + // we build an interpreter, + // hence we return an int as the result of the expressions we evaluate int res = 0; driver drv; for (int i = 1; i < argc; ++i) @@ -14,8 +16,11 @@ main (int argc, char *argv[]) else if (argv[i] == std::string ("-s")) drv.trace_scanning = true; else if (!drv.parse (argv[i])) + // parse() returns 1 if it ran into an error + // so with !drv.parse we are here if everything was fine std::cout << drv.result << '\n'; else + // error while parsing res = 1; return res; } diff --git a/flexBison/cplusplus/driver.cc b/flexBison/cplusplus/driver.cc index fa0450bd29dded44a0ac6fbaae10107c9747fd63..5f5e104401e8f66184d012c55b2b6f67937dd97b 100644 --- a/flexBison/cplusplus/driver.cc +++ b/flexBison/cplusplus/driver.cc @@ -5,6 +5,8 @@ driver::driver () : trace_parsing (false), trace_scanning (false) { + // These are hardcoded variables that are now available inside expressions you write + // e.g. 'y = one + 1' variables["one"] = 1; variables["two"] = 2; } @@ -13,11 +15,11 @@ int driver::parse (const std::string &f) { file = f; - location.initialize (&file); + location.initialize (&file); // set location to beginning of file or stdin scan_begin (); - yy::parser parse (*this); + yy::parser parse (*this); // set the parsing context to *this (remember, we are inside driver.cc class) parse.set_debug_level (trace_parsing); - int res = parse (); + int res = parse (); //this evaluates the file (or stdin, see scan_begin() definition at the end of scanner.ll) scan_end (); return res; } diff --git a/flexBison/cplusplus/driver.hh b/flexBison/cplusplus/driver.hh index 92a0e31b16b1f0d0f338a5cabd0141c5bc16f86c..8137cc3906e600fedf7bb04adba815ce5552513f 100644 --- a/flexBison/cplusplus/driver.hh +++ b/flexBison/cplusplus/driver.hh @@ -5,23 +5,30 @@ # include <map> # include "parser.hh" -// Give Flex the prototype of yylex we want ... +// Give Flex the prototype of yylex we want: +// because of %define api.value.type variant and %define api.token.constructor in parser.yy +// the parser defines the type symbol_type, and expects yylex to have the following prototype. # define YY_DECL \ yy::parser::symbol_type yylex (driver& drv) // ... and declare it for the parser's sake. +// yylex now returns a complete symbol, aggregating its type (i.e., the traditional value returned by yylex), +// its semantic value, and possibly its location. +// see https://www.gnu.org/software/bison/manual/html_node/Complete-Symbols.html YY_DECL; // Conducting the whole scanning and parsing of Calc++. +// This is the parsing context, an interface to the parser and scanner +// Contains all the data to exchange -> opens file to scan, instantiates the parser etc. class driver { public: driver (); - std::map<std::string, int> variables; + std::map<std::string, int> variables; // this holds the variables and their values we find on the way - int result; + int result; // the evaluated expession result - // Run the parser on file F. Return 0 on success. + // Run the parser on file F. Return 0 on success and 1 on failure. int parse (const std::string& f); // The name of the file being parsed. std::string file; @@ -29,11 +36,14 @@ public: bool trace_parsing; // Handling the scanner. + // These functions are declared at the end of scanner.ll void scan_begin (); void scan_end (); + // Whether to generate scanner debug traces. bool trace_scanning; - // The token's location used by the scanner. + // The token's location used by the scanner. + // made available by 'yy::location& loc = drv.location;' in scanner.ll yy::location location; }; #endif // ! DRIVER_HH diff --git a/flexBison/cplusplus/parser.yy b/flexBison/cplusplus/parser.yy index 188873b079ed7a8e0100155112aa82c4652e41fe..a517f967ba80dcf1a8361f03d96612cd568dc516 100644 --- a/flexBison/cplusplus/parser.yy +++ b/flexBison/cplusplus/parser.yy @@ -1,20 +1,30 @@ // See https://www.gnu.org/software/bison/manual/html_node/Calc_002b_002b-Parser.html +// needed for creation of parser.hh (C++ style) %skeleton "lalr1.cc" /* -*- C++ -*- */ %require "3.5" %defines +// In C we would define a %union to use as our symbol type +// In C++ and newer bison we can tell it to use a std::variant for a symbol type +// We declare our intention to use is with %define api.token.constructor %define api.value.type variant %define parse.assert +// and then just use whatever types we want. 'parse.assert' makes sure we use the right types in symbols %code requires { + // place your includes here # include <string> class driver; } -// The parsing context. +// The parsing context is passed to flex and bison. +// Equivalent to both %lex-param and %parse-param +// You could also define 'int yylex (driver& drv)' and 'int yyparse (driver& drv);' %param { driver& drv } +//request location tracking +// made available by 'yy::location& loc = drv.location;' in scanner.ll %locations %define parse.trace @@ -24,6 +34,7 @@ # include "driver.hh" } +// DRY - tokens shall begin with "TOK_" %define api.token.prefix {TOK_} %token END 0 "end of file" @@ -36,29 +47,45 @@ RPAREN ")" ; +// these are our variant types +// bison generates functions like +// 'symbol_type make_NUMBER (const std::string&, const location_type&);' for us, which can be overwritten %token <std::string> IDENTIFIER "identifier" %token <int> NUMBER "number" +// declare nonterminal expression +// compare to %type: https://www.gnu.org/software/bison/manual/html_node/Type-Decl.html +// tl;dr: use nterm explicitly for nonterminals, type can also be a terminal +// int: an exp resolves/ is evaluated to an int +// replace e.g. with AST node types %nterm <int> exp +// unit etc. do not need to be declared here, because they have no type to be evaluated to, +// as they are evaluated to other nterms -%printer { yyo << $$; } <*>; +%printer { yyo << $$; } <*>; //print values using their operator<< %% -%start unit; -unit: assignments exp { drv.result = $2; }; +%start unit; // this is our root node, so to speak +unit: assignments exp { drv.result = $2; }; +// here we set the final outcome of exp to the result of our driver +// sort of to keep it when the compiling is done +// imagine you could also put a pointer to the root node of you ast here :) assignments: %empty {} | assignments assignment {}; assignment: - "identifier" ":=" exp { drv.variables[$1] = $3; }; + "identifier" ":=" exp { drv.variables[$1] = $3; }; + // we save our variable inside the driver because we want to store it for later use +// you will find this everytime mathematical expressions are used +// it declares operator precedence; see here https://www.gnu.org/software/bison/manual/html_node/Precedence-Decl.html %left "+" "-"; %left "*" "/"; exp: "number" | "identifier" { $$ = drv.variables[$1]; } -| exp "+" exp { $$ = $1 + $3; } +| exp "+" exp { $$ = $1 + $3; } // these are real mathematical expressions being evaluated and stored back in exp ($$) | exp "-" exp { $$ = $1 - $3; } | exp "*" exp { $$ = $1 * $3; } | exp "/" exp { $$ = $1 / $3; } diff --git a/flexBison/cplusplus/scanner.ll b/flexBison/cplusplus/scanner.ll index 9cf9417b3cffa53d4ee32a074baf5885fb1978d2..39d2ef70d8e8232262255f6abd49f43599ec1fff 100644 --- a/flexBison/cplusplus/scanner.ll +++ b/flexBison/cplusplus/scanner.ll @@ -78,6 +78,9 @@ // A number symbol corresponding to the value in S. yy::parser::symbol_type make_NUMBER (const std::string &s, const yy::parser::location_type& loc); + // Note: the symbol_type is definded in parser.yy by declaring + // %define api.token.constructor and following lines. See explanation there or + // https://www.gnu.org/software/bison/manual/html_node/Complete-Symbols.html %} id [a-zA-Z][a-zA-Z_0-9]* @@ -86,13 +89,17 @@ blank [ \t\r] %{ // Code run each time a pattern is matched. + // The macro YY_USER_ACTION can be defined to provide an action which is always executed prior to the matched rule's action. + // http://dinosaur.compilertools.net/flex/flex_14.html + // yyleng is a lex global variable that is always the length of the token you just read in # define YY_USER_ACTION loc.columns (yyleng); %} %% %{ // A handy shortcut to the location held by the driver. yy::location& loc = drv.location; - // Code run each time yylex is called. + // Code run each time yylex is called. + // The lexical analyzer function, yylex, recognizes tokens from the input stream and returns them to the parser. loc.step (); %} {blank}+ loc.step (); @@ -106,8 +113,8 @@ blank [ \t\r] ")" return yy::parser::make_RPAREN (loc); ":=" return yy::parser::make_ASSIGN (loc); -{int} return make_NUMBER (yytext, loc); -{id} return yy::parser::make_IDENTIFIER (yytext, loc); +{int} return make_NUMBER (yytext, loc); // make_NUMBER is defined below and overwrites the bison generated one +{id} return yy::parser::make_IDENTIFIER (yytext, loc); // the function is generated by bison . { throw yy::parser::syntax_error (loc, "invalid character: " + std::string(yytext)); @@ -115,6 +122,7 @@ blank [ \t\r] <<EOF>> return yy::parser::make_END (loc); %% +// overwrite the bison generated make_NUMBER function, because we want to add some functionality yy::parser::symbol_type make_NUMBER (const std::string &s, const yy::parser::location_type& loc) {