Commit 1e561222 authored by Ronald Charles Moore's avatar Ronald Charles Moore
Browse files

lexer done

parent 0f3fa0ff
......@@ -2,59 +2,158 @@
// Prof. Ronald Moore
// https://fbi.h-da.de/personen/ronald-moore/
// mailto:ronald.moore@h-da.de
// with no warranties whatsoever
//
// The grammar we are going to parse here is:
// Grammar:
// E → T E´
// E´ → + T E´ | - T E´ | ε
// T → F T´
// T´ → * F T´ | / F T´ | ε
// F → ( E ) | num
// where the following are taken to be tokens:
// with no warranties whatsoever!
#include <cassert>
#include <cctype> // for isspace
#include <cstdlib> // for strtod
#include <iostream>
#include <fstream>
#include <string>
// include <vector>
// ===================
// LEXICAL ANALYSIS
// The following are taken to be tokens:
// left and right parenthesis, the plus and minus characters,
// as well as asterisk and forward slash -- and numbers.
// In the script, substraction and division are not supported,
// but it seems like time to add them.
//
// Note that the recursive descent function for (e.g.) E´
// is nameded "E2ndHalf"-
#include <cstdio>
#include <cstdlib>
#include <string>
#include <vector>
// Preliminaries and Utilities
// ============================
// global variables -- sue me if you don't like that!
std::string currentLine( "" );
int currentLineNumber = 0;
int currentColumnNumber = 0;
int currentTokenLength = 0;
// Utility Types
typedef double numberType; // feel fee to change this to something else like int or float or bigint....
typedef enum Token {
tok_number = 'n',
tok_lparen = '(',
tok_rparen = ')',
tok_plus = '+',
tok_minus = '-',
tok_times = '*',
tok_div = '/',
tok_eof = 'E',
bad_tok = 'X'
} Token;
// global variables -- sue me if you don't like that!
static std::istream *input = &(std::cin); // until proven otherwise
static std::string currentLine( "" );
static int currentLineNumber = -1;
static int currentColumnNumber = 0;
static int currentTokenLength = 0;
// the tokens
enum Token {
tok_number,
tok_lparen,
tok_rparen,
tok_plus,
tok_minus,
tok_times,
tok_div
} next_token; // again with the global variables...
static Token next_token; // again with the global variables...
static numberType currentNumber; // = zero....
// The Lexer
// ==========
static bool skippedWhiteSpace( ) { // return true if not at EOF, i.e. if skipped
while ( true ) {
int currentLineLength = currentLine.length();
while ( currentColumnNumber < currentLineLength )
if ( isspace( currentLine[ currentColumnNumber ] ) )
currentColumnNumber++;
else // if NOT isspace()
return true;
// if we're here, we're at the end of a line.
std::getline( *input, currentLine );
currentLineNumber++;
currentColumnNumber = 0;
if ( ! *input ) // EOF!!
return false;
// else, repeat!
// Which is the same as
// return skippedWhiteSpace() -- i.e. tail recursion.
};
};
static Token gettok( ) {
assert( input ); // we assume nullptr != input
if ( ! *input ) return bad_tok;
// else, we can read from input
// Skip white space, going to next line as necessary
if ( ! skippedWhiteSpace( ) ) return tok_eof;
// We're have visible text in front of us.
char currentChar = currentLine[ currentColumnNumber ];
currentColumnNumber++; // usually, but see num...
switch ( currentChar ) {
case '(' : return tok_lparen;
case ')' : return tok_rparen;
case '+' : return tok_plus;
case '-' : return tok_minus;
case '*' : return tok_times;
case '/' : return tok_div;
default :
// either we have a number in front of us, or we don't
assert( 0 < currentColumnNumber );
char *alpha = &(currentLine[ currentColumnNumber-1 ]);
// minus one because we incremented it before the switch
char *omega = nullptr; // until we call strtod...
double tmpValue = strtod( alpha, &omega );
if ( alpha == omega ) {
return bad_tok; // !!!
};
// else if strtod found a real number (or at least a double)
currentNumber = tmpValue; // let C++ do the converison
currentColumnNumber += (omega - alpha) -1;
// minus one because we incremented it before the switch
return tok_number;
}; // end switch
assert( false ); // we should never get here!
return bad_tok;
} // end gettok
// PARSING!!!
// ===========
//
// The grammar we are going to parse here is:
// Grammar:
// E → T E´
// E´ → + T E´ | - T E´ | ε
// T → F T´
// T´ → * F T´ | / F T´ | ε
// F → ( E ) | num
// Note that the recursive descent function for (e.g.) E´
// is nameded "E2ndHalf"-
// main (!)
// =========
int main( int argc, char **argv ) {
if (2 != argc) {
std::cerr << "Usage: " << argv[0] << " <fileName>.\n"
<< "You provided " << argc-1 << " arguments, we take exactly one (only).\n";
return( -1 );
};
// else if 1 == argc ....
std::string fileName( argv[1] );
if ( "-" != fileName ) {
static std::ifstream ifs( fileName );
input = &ifs;
}
// Prime the pump!
next_token = gettok( );
// get tokens and dump them...
while ( tok_eof != next_token ) {
std::cout << "TOKEN = " << (char)next_token
<< " current num = " << currentNumber
<< std::endl;
next_token = gettok();
};
std::cout << "EOF" << std::endl;
return 0; // Alles klar!!!
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment