Compilerbau: Mini-Scanner mit JLex |
für die Symbole IF, ID, NUM, REAL und für Zwischenräume und illegale Zeichen. Zwei Varianten
|
import java.io.IOException;
public class Mini0 {
public
static
void main(String argv[])
throws java.io.IOException {
Scanner0 s = new Scanner0(System.in);
Token0 t;
while ((t = s.nextSymbol()) != null)
System.out.println(t);
}
}
class Token0 {
public static final int
IF = 1,
ID = 2,
NUM = 3,
REAL = 4,
SEP = 5,
ERR = 6;
public int token;
public String text;
Token0(int token,
String text) {
this.token = token;
this.text = text;
}
public String toString() {
return
"token: " + token +
"\ttext: \"" + text + "\"";
}
}
%%
%class Scanner0
%type Token0
%function nextSymbol
%full
%line
%char
%{
private Token0 mkToken(int token) {
return
new Token0(token, yytext());
}
%}
digit=[0-9]
digits={digit}+
digits0={digit}*
whitespace=[ \t\n]+
%%
if { return
mkToken(Token0.IF);
}
[a-zA-Z][a-zA-Z0-9]* { return
mkToken(Token0.ID);
}
{digits} { return
mkToken(Token0.NUM);
}
({digits}"."{digits0})|({digits0}"."{digits})
{ return
mkToken(Token0.REAL);
}
("--".*[\n])|{whitespace}
{
}
. { return
mkToken(Token0.ERR);
}
|
some test input
for Mini scanner
a comment -- comment
a number 123
some reals 123.456 123. .456
an if symbol
some illegal symbols . - +
|
token: 2 text: "some"
token: 2 text: "test"
token: 2 text: "input"
token: 2 text: "for"
token: 2 text: "Mini"
token: 2 text: "scanner"
token: 2 text: "a"
token: 2 text: "comment"
token: 2 text: "a"
token: 2 text: "number"
token: 3 text: "123"
token: 2 text: "some"
token: 2 text: "reals"
token: 4 text: "123.456"
token: 4 text: "123."
token: 4 text: ".456"
token: 2 text: "an"
token: 1 text: "if"
token: 2 text: "symbol"
token: 2 text: "some"
token: 2 text: "illegal"
token: 2 text: "symbols"
token: 6 text: "."
token: 6 text: "-"
token: 6 text: "+"
|
für die gleichen Symbole. Die Symbolklasse ist so erweitert worden, dass die Zeilennummer und die Position in dem Eingabestrom für Fehlermeldungen mit gespeichert wird. |
import java.io.IOException;
public class Mini {
public
static
void main(String argv[])
throws java.io.IOException {
Scanner s = new Scanner(System.in);
Token t;
while ((t = s.nextSymbol()) != null)
System.out.println(t);
}
}
class Token {
public
final
static String
IF = "IF",
ID = "ID",
NUM = "NUM",
REAL = "REAL",
SEP = "SEP",
ERR = "ERR";
public String token;
public String text;
public int line;
public int charBegin;
public int charEnd;
Token(String token,
String text,
int line,
int charBegin,
int charEnd) {
this.token = token;
this.text = text;
this.line = line;
this.charBegin = charBegin;
this.charEnd = charEnd;
}
public String toString() {
return
"token: " + token +
"\tline: " + line +
"\trange: " + charBegin + ".." + charEnd +
"\ttext: \"" + text + "\"";
}
}
%%
%class Scanner
%type Token
%function nextSymbol
%full
%line
%char
%{
private Token mkToken(String token) {
return
new Token(token,
yytext(),
yyline,
yychar,
yychar + yytext().length());
}
%}
digit=[0-9]
digits={digit}+
digits0={digit}*
whitespace=[ \t\n]+
%%
if { return
mkToken(Token.IF);
}
[a-zA-Z][a-zA-Z0-9]* { return
mkToken(Token.ID);
}
{digits} { return
mkToken(Token.NUM);
}
({digits}"."{digits0})|({digits0}"."{digits})
{ return
mkToken(Token.REAL);
}
("--".*[\n])|{whitespace}
{ return
mkToken(Token.SEP);
}
. { return
mkToken(Token.ERR);
}
|
Die gleiche Eingabe wie oben: some test input
for Mini scanner
a comment -- comment
a number 123
some reals 123.456 123. .456
an if symbol
some illegal symbols . - +
|
token: ID line: 0 range: 0..4 text: "some"
token: SEP line: 0 range: 4..5 text: " "
token: ID line: 0 range: 5..9 text: "test"
token: SEP line: 0 range: 9..10 text: " "
token: ID line: 0 range: 10..15 text: "input"
token: SEP line: 0 range: 15..16 text: "
"
token: ID line: 1 range: 16..19 text: "for"
token: SEP line: 1 range: 19..20 text: " "
token: ID line: 1 range: 20..24 text: "Mini"
token: SEP line: 1 range: 24..25 text: " "
token: ID line: 1 range: 25..32 text: "scanner"
token: SEP line: 1 range: 32..33 text: "
"
token: ID line: 2 range: 33..34 text: "a"
token: SEP line: 2 range: 34..35 text: " "
token: ID line: 2 range: 35..42 text: "comment"
token: SEP line: 2 range: 42..43 text: " "
token: SEP line: 2 range: 43..54 text: "-- comment
"
token: ID line: 3 range: 54..55 text: "a"
token: SEP line: 3 range: 55..56 text: " "
token: ID line: 3 range: 56..62 text: "number"
token: SEP line: 3 range: 62..63 text: " "
token: NUM line: 3 range: 63..66 text: "123"
token: SEP line: 3 range: 66..67 text: "
"
token: ID line: 4 range: 67..71 text: "some"
token: SEP line: 4 range: 71..72 text: " "
token: ID line: 4 range: 72..77 text: "reals"
token: SEP line: 4 range: 77..78 text: " "
token: REAL line: 4 range: 78..85 text: "123.456"
token: SEP line: 4 range: 85..86 text: " "
token: REAL line: 4 range: 86..90 text: "123."
token: SEP line: 4 range: 90..91 text: " "
token: REAL line: 4 range: 91..95 text: ".456"
token: SEP line: 4 range: 95..96 text: "
"
token: ID line: 5 range: 96..98 text: "an"
token: SEP line: 5 range: 98..99 text: " "
token: IF line: 5 range: 99..101 text: "if"
token: SEP line: 5 range: 101..102 text: " "
token: ID line: 5 range: 102..108 text: "symbol"
token: SEP line: 5 range: 108..109 text: "
"
token: ID line: 6 range: 109..113 text: "some"
token: SEP line: 6 range: 113..114 text: " "
token: ID line: 6 range: 114..121 text: "illegal"
token: SEP line: 6 range: 121..122 text: " "
token: ID line: 6 range: 122..129 text: "symbols"
token: SEP line: 6 range: 129..130 text: " "
token: ERR line: 6 range: 130..131 text: "."
token: SEP line: 6 range: 131..132 text: " "
token: ERR line: 6 range: 132..133 text: "-"
token: SEP line: 6 range: 133..134 text: " "
token: ERR line: 6 range: 134..135 text: "+"
token: SEP line: 6 range: 135..136 text: "
"
|
Letzte Änderung: 29.11.2012 | © Prof. Dr. Uwe Schmidt |