lexer.cpp 4.24 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
// This code made available to his students by 
// Prof. Ronald Moore  
//     https://fbi.h-da.de/personen/ronald-moore/  
//     mailto:ronald.moore@h-da.de
// with no warranties whatsoever!

#include "lexer.h"

#include <cassert>
#include <cctype> // for isspace
#include <cstdlib> // for strtod
#include <iostream>
#include <fstream>

namespace lex {  // continue to define things in lex::
	
// instantiate here... 
lex::Token next_token; // again with the global variables... 

// global variables -- sue me if you don't like that!
static std::string		inputSourceName( "standard input" ); 
static std::istream 	*input = &(std::cin); // until proven otherwise

static std::string		currentLine( "" );
static int				currentLineNumber = -1;
static int				currentColumnNumber = 0;

// Namespace "member functions"
// ============================

void printInputLocation( ) {
	std::cout << inputSourceName 
			  << " (" << currentLineNumber
	          << ',' << currentColumnNumber 
	          << "):" << std::endl;
	std::cout << currentLine << std::endl;
	for ( int col = 0; col < currentColumnNumber; col++ )
		std::cout << '-';
	std::cout << '^' << std::endl;
} // end printInputLocation

void printErrorMsg( const std::string Error )
{
	printInputLocation( );
	std::cout << "ERROR : " << Error << std::endl;
	advance_token( ); // Don't want to get stuck here.
} // end printErrorMsg

// Utility skippedWhiteSpace... 
static bool skippedWhiteSpace( ) { // return true if not at EOF, i.e. if skipped
	while ( true ) {
		int currentLineLength = currentLine.length();
		while ( currentColumnNumber < currentLineLength )
			if ( isspace( currentLine[ currentColumnNumber ] ) ) 
				currentColumnNumber++;
			else // if NOT isspace() 
				return true;
				
		// if we're here, we're at the end of a line.
		std::getline( *input, currentLine ); 
		currentLineNumber++;
		currentColumnNumber = 0;
		if ( ! *input ) // EOF!!
			return false;
		// else, repeat! 
		// Which is the same as 
		// return skippedWhiteSpace()  -- i.e. tail recursion.
	};
};		

Token gettok( ) {
	assert( input ); // we assume nullptr != input
	
	Token result( bad_tok, '\0' ); // default 
	
	if ( ! *input ) return result; // i.e. bad_tok
	// else, we can read from input
	
	// Skip white space, going to next line as necessary
	if ( ! skippedWhiteSpace( ) ) {
		result.first = lex::tok_eof;
		return result;
	};
	        
	// else -- not eof, d.h. we have visible text in front of us.
	char currentChar = currentLine[ currentColumnNumber ];
	result.second = currentChar; // unless it's a number, etc.
	currentColumnNumber++; // usually, but see num...
	switch (  currentChar ) {
		case '(' :	result.first = tok_lparen;
					break;
		case ')' :	result.first = tok_rparen;
					break;
		case '+' :	result.first = tok_plus;
					break;
		case '-' :	result.first = tok_minus;
					break;
		case '*' :	result.first = tok_times;
					break;
		case '/' :	result.first = tok_div;
					break;
		default : 
			// either we have a number in front of us, or we don't
			assert( 0 < currentColumnNumber ); // remember, incremented!
			char *alpha = &(currentLine[ currentColumnNumber-1 ]);
			// minus one because we incremented it before the switch
			char *omega = nullptr; // until we call strtod...
			double tmpValue = strtod( alpha, &omega );
			// strtod sets omega to the first char after the number
			if ( alpha == omega ) {
				result.second = *omega; // or *alpha, they're the same...
				return result; // i.e. bad_tok !!!
			};
			// else if strtod found a real number (or at least a double)
			result.first = tok_number;
			result.second = tmpValue;  // let C++ do any converisons
			currentColumnNumber += (omega - alpha) -1;
			// minus one because we incremented it before the switch
	
	}; // end switch
	return result;
} // end gettok

void openInputSource( std::string filename ) {
	assert( ! filename.empty() ); // caller should check that
	inputSourceName = filename;
	if ( "-" == filename ) 
		input = &(std::cin);
	else { // if fileName is not "-" (a dash)
		static std::ifstream ifs( filename, std::ifstream::in );
		if ( ! ifs.good( ) ) {
			std::cerr << "ERROR opening file name " << filename
			          << " -- could not open.\n";
			exit( -2 );
		}; 
		// else if ifs is good
		input = &ifs;
	};
} // end of openInputFile

} // end namespace lex