
(inputs by Riminch Ammu)
Lexical
analyzer Program in C language
on Unix Platform
Total 5
files:
1. globals.cpp
2. globals.h
3. lex.cpp
4. lex.h
5. lextest.cpp
1. global.cpp
// globals.cpp
#include "globals.h"
TokenStream ctoken;
2. globals.h
// globals.h
#ifndef GLOBALS_H
#define GLOBALS_H
#include "lex.h"
extern TokenStream ctoken;
#endif
3. lex.cpp
/ lex.cpp
// Lexical Analyzer: For a description
of the tokens, see "lex.h".
#include <assert.h>
#include <ctype.h>
#include <iostream.h>
#include <stdlib.h>
#include "lex.h"
// Token::kind()
TokenKind Token::kind() const
{
return the_kind;
}
// Token::value()
// Return the value of a NUMBER
token.
unsigned Token::value() const
{
assert (the_kind == NUMBER);
return the_value;
}
// Token::op_char()
// Return the character corresponding
to the OP token
char Token::op_char() const
{
assert (the_kind == OP);
return the_op_char;
}
// Token::print()
// Output the value of this token
followed by a new line.
void Token::print() const
{
switch (the_kind) {
case QUIT: cout << "QUIT";
break;
case END: cout << "END";
break;
case NUMBER: cout << "NUMBER:
" << the_value; break;
case OP: cout << "OP:
" << the_op_char; break;
case LPAREN: cout << "LPAREN";
break;
case RPAREN: cout << "RPAREN";
break;
}
cout << endl;
}
// TokenStream::get()
// Return the next input token
and remove it from the stream.
Token TokenStream::get()
{
Token tok; // return value
if (the_stack.empty()) {
tok = input();
}
else {
tok = the_stack.top();
the_stack.pop();
}
return tok;
}
TOP
// TokenStream::peek()
// Return the next input token
but do not remove it from the
stream.
// The next call to peek() or
get() should return the same token
as this call.
Token TokenStream::peek()
{
if (the_stack.empty()) {
the_stack.push(input());
}
return the_stack.top();
}
Token TokenStream::input()
{
Token tok;
// Otherwise, get the next token
from the input
while (true) { // loop until we
can return a token
int c = cin.peek();
if (c == EOF) {
cerr << "Unexpected
end of file" << endl;
exit (EXIT_FAILURE);
}
else if (c == 'q') {
tok.the_kind = QUIT;
cin.get();
return tok;
}
else if (c == '=') {
tok.the_kind = END;
cin.get();
return tok;
}
else if (c == '(') {
cin.get();
tok.the_kind = LPAREN;
return tok;
}
else if (c == ')') {
cin.get();
tok.the_kind = RPAREN;
return tok;
}
else if (c == '+' || c == '-'
|| c == '*' || c == '/') {
cin.get(); // scan past operator
tok.the_kind = OP;
tok.the_op_char = c;
return tok;
}
else if (isdigit(c)) {
tok.the_kind = NUMBER;
tok.the_value = cin.get() - '0';
// read a 1-digit number
return tok;
}
else if (isspace(c)) {
// skip past token and keep looping
cin.get();
}
else {
// read past char; warn user;
keep looping
cin.get(); // read past character
cerr << "WARNING: Unexpected
character ignored: "
<< (char) c << endl;
}
}
}
4. lex.h
// lex.h
#ifndef LEX_H
#define LEX_H
#include <iostream.h>
#include <stack.h>
// TokenKind defines the legal
tokens in the language
enum TokenKind {
QUIT, // the letter 'q'
END, // equals sign is end of
expression
NUMBER, // a single digit
OP, // arithmetic operator
LPAREN, RPAREN // parentheses
};
class Token {
friend class TokenStream;
public:
TokenKind kind() const;
unsigned value() const; // return
the value of a NUMBER token
char op_char() const; // return
the character of an OP token
void print() const; // output
the kind (and other relevant info)
private:
TokenKind the_kind;
unsigned the_value;
char the_op_char;
};
class TokenStream {
public:
Token peek(); // Return the value
of the next input token without
removing
// it from the stream.
Token get(); // Return the next
input token; remove it from the
stream.
private:
Token input(); // Return the next
token from standard input
Token input_string (); // Input
an ID or a keyword token.
Token input_number (); // Input
a NUMBER token.
stack<Token> the_stack;
// Used by get and peek to save
tokens.
};
#endif
5. lextest.cpp
// lextest.cpp
#include <iostream.h>
#include <stdlib.h>
#include "globals.h"
#include "lex.h"
// Test the lexical analyzer
by reading a sequence of tokens
from the
// input and outputting the value
of each token, one per line
// until the QUIT token is returned.
int main ()
{
Token tok1, tok2;
cout << "\nTesting
lexical analyzer...\n" <<
endl;
do {
tok1 = ctoken.peek();
tok2 = ctoken.get();
assert (tok1.kind() == tok2.kind());
tok2.print();
} while (tok2.kind() != QUIT);
return 0;
} |