本文发表在 rolia.net 枫下论坛/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Copyright (C) 1998,99 Gerwin Klein <kleing@informatik.tu-muenchen.de>. *
* All rights reserved. *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License. See the file *
* COPYRIGHT for more information. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License along *
* with this program; if not, write to the Free Software Foundation, Inc., *
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* Java 1.2 language lexer specification */
/* Note, that this lexer specification is not tuned for speed.
It is in fact quite slow on integer and floating point literals,
because the input is read twice and the methods used to parse
the numbers are not very fast.
For a real world application (e.g. a Java compiler) this can
and should be optimized */
package mj.compiler.parser;
import java_cup.runtime.Symbol;
%%
%public
%class Scanner
%unicode
%cupsym Sym
%cup
%line
%column
%{
/**
* OK, listen up, there's a wrapper thing going on here.
*
* JavaCUP insists that the 'tokens' we return are java_cup.runtime.Symbol.
* And that isn't an interface, its a class. And it doesn't want to give
* us the instances, it wants to give us the fields of the instances. So
* we can't really do what we want with that class, which is to add line
* and column info to it. Well, we could add it, but we'd have to call it
* left and right, and that doesn't seem very easy to understand.
*
* So, we have our OWN CLASS OF TOKEN, called Token.
* And we have JAVA CUP's CLASS OF TOKEN, called Symbol.
*
* And we wrap instances of Token inside instances of Symbol. Our Tokens
* sit in the value field of Symbols. So Symbol is doing two things:
* (i) getting the token kind to JavaCUP, and (ii) getting our Tokens to
* JavaCUP, so that we can get them back.
*
* We read everything we need out of Tokens (not symbols). Most of that
* happens in the AST constructors.
*/
private Symbol makeSymbol(int kind) {
return new Symbol(kind, makeToken(kind));
}
/**
* See documentation at makeSymbol.
*/
private Token makeToken(int kind) {
return new Token(kind, yyline, yycolumn, yytext());
}
%}
/* main character classes */
LineTerminator = \r|\n|\r\n
InputCharacter = [^\r\n]
WhiteSpace = {LineTerminator} | [ \t\f]
/* comments */
Comment = {TraditionalComment} | {EndOfLineComment} | {DocumentationComment}
TraditionalComment = "/*" [^*] {CommentContent} \*+ "/"
UnterminatedComment = "/*" {CommentContent} \** "/"?
EndOfLineComment = "//" {InputCharacter}* {LineTerminator}
UnterminatedEndOfLineComment = "//" {InputCharacter}*
DocumentationComment = "/**" {CommentContent} \*+ "/"
CommentContent = ( [^*] | \*+[^*/] )*
/* identifiers */
Identifier = [:jletter:][:jletterdigit:]*
/* integer literals */
DecIntegerLiteral = 0 | [1-9][0-9]*
%%
/* keywords */
"boolean" { return makeSymbol(Sym.BOOLEAN); }
"break" { return makeSymbol(Sym.BREAK); }
"class" { return makeSymbol(Sym.CLASS); }
"continue" { return makeSymbol(Sym.CONTINUE); }
"else" { return makeSymbol(Sym.ELSE); }
"for" { return makeSymbol(Sym.FOR); }
"int" { return makeSymbol(Sym.INT); }
"new" { return makeSymbol(Sym.NEW); }
"if" { return makeSymbol(Sym.IF); }
"return" { return makeSymbol(Sym.RETURN); }
"void" { return makeSymbol(Sym.VOID); }
"while" { return makeSymbol(Sym.WHILE); }
"this" { return makeSymbol(Sym.THIS); }
/* boolean literals */
"true" { return makeSymbol(Sym.BOOLEAN_LITERAL); }
"false" { return makeSymbol(Sym.BOOLEAN_LITERAL); }
/* null literal */
"null" { return makeSymbol(Sym.NULL_LITERAL); }
/* separators */
"(" { return makeSymbol(Sym.LPAREN); }
")" { return makeSymbol(Sym.RPAREN); }
"{" { return makeSymbol(Sym.LBRACE); }
"}" { return makeSymbol(Sym.RBRACE); }
";" { return makeSymbol(Sym.SEMICOLON); }
"," { return makeSymbol(Sym.COMMA); }
"." { return makeSymbol(Sym.DOT); }
/* operators */
"=" { return makeSymbol(Sym.EQ); }
">" { return makeSymbol(Sym.GT); }
"<" { return makeSymbol(Sym.LT); }
"!" { return makeSymbol(Sym.NOT); }
"~" { return makeSymbol(Sym.COMP); }
"==" { return makeSymbol(Sym.EQEQ); }
"<=" { return makeSymbol(Sym.LTEQ); }
">=" { return makeSymbol(Sym.GTEQ); }
"!=" { return makeSymbol(Sym.NOTEQ); }
"&&" { return makeSymbol(Sym.ANDAND); }
"||" { return makeSymbol(Sym.OROR); }
"++" { return makeSymbol(Sym.PLUSPLUS); }
"--" { return makeSymbol(Sym.MINUSMINUS); }
"+" { return makeSymbol(Sym.PLUS); }
"-" { return makeSymbol(Sym.MINUS); }
"*" { return makeSymbol(Sym.MULT); }
"/" { return makeSymbol(Sym.DIV); }
"&" { return makeSymbol(Sym.AND); }
"|" { return makeSymbol(Sym.OR); }
"^" { return makeSymbol(Sym.XOR); }
"%" { return makeSymbol(Sym.MOD); }
/* numeric literals */
{DecIntegerLiteral} { return makeSymbol(Sym.INTEGER_LITERAL); }
/* comments */
{Comment} { }
{UnterminatedComment} { throw new RuntimeException("Unterminated comment at EOF.\nComment started at line "+(yyline+1)+", column "+(yycolumn+1)); }
{UnterminatedEndOfLineComment} { throw new RuntimeException("Unterminated comment at EOF.\nComment started at line "+(yyline+1)+", column "+(yycolumn+1)); }
/* whitespace */
{WhiteSpace} { }
/* identifiers */
{Identifier} { return makeSymbol(Sym.IDENTIFIER); }
/* error fallback */
.|\n { throw new RuntimeException("Illegal character \""+yytext()+"\" at line "+(yyline+1)+", column "+(yycolumn+1)); }更多精彩文章及讨论,请光临枫下论坛 rolia.net