http://www.cnblogs.com/xuqiang/archive/2010/09/21/1953501.html
Main.java
/** 主程序*/
import java.io.*;
import lexer.*;public class Main {public static void main(String[] args) throws IOException {Lexer lexer = new Lexer();while (lexer.getReaderState() == false) {lexer.scan();}/* 保存相關(guān)信息 */lexer.saveTokens();lexer.saveSymbolsTable();}
}
Lexer.java
package lexer;import java.io.*;
import java.util.*;import symbols.*;public class Lexer {public static int line = 1; /* 記錄行號 */char peek = ' '; /* 下一個讀入字符 */Hashtable<String, Word> words = new Hashtable<String, Word>();/* 符號表 */private Hashtable<Token, String> table = new Hashtable<Token, String>();/* token序列 */private List<String> tokens = new LinkedList<String> ();/* 讀取文件變量 */BufferedReader reader = null; /* 保存當(dāng)前是否讀取到了文件的結(jié)尾 */private Boolean isEnd = false;/* 是否讀取到文件的結(jié)尾 */public Boolean getReaderState() {return this.isEnd;}/* 保存存儲在table中的 */public void saveSymbolsTable() throws IOException {FileWriter writer = new FileWriter("符號表.txt");writer.write("[符號] [符號類型信息]\n");writer.write("\r\n");Enumeration<Token> e = table.keys();while( e.hasMoreElements() ){Token token = (Token)e.nextElement();String desc = table.get(token);/* 寫入文件 */writer.write(token + "\t\t\t" + desc + "\r\n");}writer.flush();}/* 保存Tokens */public void saveTokens() throws IOException {FileWriter writer = new FileWriter("Tokens表.txt");writer.write("[符號] \n");writer.write("\r\n");for(int i = 0; i < tokens.size(); ++i) {String tok = (String)tokens.get(i);/* 寫入文件 */writer.write(tok + "\r\n");} writer.flush();}void reserve(Word w) {words.put(w.lexme, w);}/** 構(gòu)造函數(shù)中將關(guān)鍵字和類型添加到hashtable words中*/public Lexer() {/* 初始化讀取文件變量 */try {reader = new BufferedReader(new FileReader("輸入.txt"));}catch(IOException e) {System.out.print(e);}/* 關(guān)鍵字 */this.reserve(new Word("if", Tag.IF));this.reserve(new Word("then", Tag.THEN));this.reserve(new Word("else", Tag.ELSE));this.reserve(new Word("while", Tag.WHILE));this.reserve(new Word("do", Tag.DO));/* 類型 */this.reserve(Word.True);this.reserve(Word.False);this.reserve(Type.Int);this.reserve(Type.Char);this.reserve(Type.Bool);this.reserve(Type.Float);}public void readch() throws IOException {/* 這里應(yīng)該是使用的是 */peek = (char)reader.read();if((int)peek == 0xffff){this.isEnd = true;}// peek = (char)System.in.read();}public Boolean readch(char ch) throws IOException {readch();if (this.peek != ch) {return false;}this.peek = ' ';return true;}public Token scan() throws IOException {/* 消除空白 */ for( ; ; readch() ) {if(peek == ' ' || peek == '\t')continue;else if (peek == '\n') line = line + 1;elsebreak;}/* 下面開始分割關(guān)鍵字,標(biāo)識符等信息 */switch (peek) {/* 對于 ==, >=, <=, !=的區(qū)分使用狀態(tài)機實現(xiàn) */case '=' :if (readch('=')) {tokens.add("==");return Word.eq; }else {tokens.add("=");return new Token('=');}case '>' :if (readch('=')) {tokens.add(">=");return Word.ge;}else {tokens.add(">");return new Token('>');}case '<' :if (readch('=')) {tokens.add("<=");return Word.le;}else {tokens.add("<");return new Token('<');}case '!' :if (readch('=')) {tokens.add("!=");return Word.ne;}else {tokens.add("!");return new Token('!');} }/* 下面是對數(shù)字的識別,根據(jù)文法的規(guī)定的話,這里的* 數(shù)字只要是能夠識別整數(shù)就行.*/if(Character.isDigit(peek)) {int value = 0;do {value = 10 * value + Character.digit(peek, 10);readch();} while (Character.isDigit(peek));Num n = new Num(value);tokens.add(n.toString());//table.put(n, "Num");return n;}/** 關(guān)鍵字或者是標(biāo)識符的識別*/if(Character.isLetter(peek)) {StringBuffer sb = new StringBuffer();/* 首先得到整個的一個分割 */do {sb.append(peek);readch();} while (Character.isLetterOrDigit(peek));/* 判斷是關(guān)鍵字還是標(biāo)識符 */String s = sb.toString();Word w = (Word)words.get(s);/* 如果是關(guān)鍵字或者是類型的話,w不應(yīng)該是空的 */if(w != null) {// table.put(w, "KeyWord or Type");tokens.add(w.toString());return w; /* 說明是關(guān)鍵字 或者是類型名 */}/* 否則就是一個標(biāo)識符id */w = new Word(s, Tag.ID);tokens.add(w.toString());table.put(w, "id");words.put(s, w);return w;}/* peek中的任意字符都被認(rèn)為是詞法單元返回 */Token tok = new Token(peek);// table.put(tok, "Token or Seprator");if ((int)peek != 0xffff ) tokens.add(tok.toString());peek = ' ';return tok;}
}
Num.java
package lexer;public class Num extends Token{public final int value;public Num(int v) {super(Tag.NUM);this.value = v;}public String toString() {return "" + value;}
}
Tag.java
package lexer;public class Tag {public final static intAND = 256,BASIC = 257,BREAK = 258,DO = 259,ELSE = 260,EQ = 261, /* == */FALSE = 262,GE = 263,ID = 264,IF = 265,INDEX = 266,LE = 267,MINUS = 268,NE = 269,NUM = 270,OR = 271,REAL = 272,TEMP = 273,TRUE = 274,WHILE = 275,/* 后面添加 */THEN = 276;
}
Token.java
package lexer;public class Token {public final int tag;public Token(int t) {this.tag = t;}public String toString() {return "" + (char)tag;}public static void main(String[] args) {Token tok = new Token('a');System.out.println(tok);}
}
Word.java
/** 類word用于管理保留字,標(biāo)識符以及像&&這樣的復(fù)合單詞元素 。*/
package lexer;public class Word extends Token {public String lexme = "";public Word (String s, int t) {super(t);this.lexme = s;}public String toString() {return this.lexme;}public static final Word and = new Word("&&", Tag.AND),or = new Word("||", Tag.OR),eq = new Word ("==", Tag.EQ),ne = new Word("!=", Tag.NE),le = new Word("<=", Tag.LE),ge = new Word(">=", Tag.GE),minus = new Word("minus", Tag.MINUS),True = new Word("true", Tag.TRUE),False = new Word("false", Tag.FALSE),temp = new Word("t", Tag.TEMP);
}
Type.java
/** 說明數(shù)據(jù)類型*/
package symbols;import lexer.*;public class Type extends Word{public Type(String s, int tag) {super(s, tag);}public static final TypeInt = new Type("int", Tag.BASIC),Float = new Type("float", Tag.BASIC),Char = new Type ("char", Tag.BASIC),Bool = new Type("bool", Tag.BASIC);}
?
============
http://freewxy.iteye.com/blog/870016
什么是詞法? ?
?
?? 所謂詞法,源代碼由字符流組成,字符流中包括關(guān)鍵字,變量名,方法名,括號等等符號,其中變量名要滿足不能包括標(biāo)點符號,不能以數(shù)字開頭的數(shù)字與字母的字符串這個條件,對于括號要成對出現(xiàn)等等,這就是詞法;
?
什么是詞法分析?
?
?? 詞 法分析階段是編譯過程的第一個階段。這個階段的任務(wù)是從左到右一個字符一個字符地讀入源程序,即對構(gòu)成源程序的字符流進(jìn)行掃描然后根據(jù)構(gòu)詞規(guī)則識別單詞(也稱單詞符號或符號)。
?
待分析的簡單語言的詞法:
?
?1)?關(guān)鍵字
?? begin?if?then?while?do?end
?2)?運算符和界符
?? :=?+?-?*?/?<?<=?>?>=?<>?=?;?(?)?#
?3)?其他單詞是標(biāo)識符(ID)和整形常數(shù)(NUM),通過以下正規(guī)式定義:
?? ID=letter(letter|digit)*
?? NUM=digitdigit*
?4)? 空格由空白、制表符和換行符組成。空格一般用來分隔ID、NUM、運算符、界符和關(guān)鍵字,詞法分析階段通常被忽略。
?
?
??????????????????????? 各種單詞符號對應(yīng)的種別編碼
單詞符號
種別碼
單詞符號
種別碼
begin
1
:
17
if
2
:=
18
then
3
<
20
while
4
<>
21
do
5
<=
22
end
6
>
23
letter(letter|digit)*
10
>=
24
digitdigit*
11
=
25
+
13
;
26
-
14
(
27
*
15
)
28
/
16
#
0
?
?
詞法分析程序的功能:
?
?? 輸入:所給文法的源程序字符串
?? 輸出:二元組(syn,?token或sum)構(gòu)成的序列。
?? syn為單詞種別碼;
?? token為存放的單詞自身字符串;
?? sum為整形常數(shù)。
?? 例如:對源程序begin?x:=9;if?x>0?then?x:=2*x+1/3;end#?經(jīng)詞法分析后輸出如下序列:(1,begin)(10, ’ x ’ )?(18,:=)?(11,9)?(26,;)?(2,if) ……
?
?
?
流 程圖:
?源碼:
?
Java代碼 ?
public ?class ?詞法分析?{ ???? ?? ????? ? ? ? ?? ????static ?String?prog; ?? ????static ?char ?ch; ?? ????static ?char []token=new ?char [8 ]; ?? ????static ?int ?syn,p,m,n,sum; ?? ????static ??? ????String[]?rwtable={"begin" ,"if" ,"then" ,"while" ,"do" ,"end" }; ?? ???? ?? ????? ? ? ?? ????public ?static ?void ?main(String[]?args)?throws ?IOException?{ ?? ??????????? ??????????? ??????????? ????????prog=dofile.readFileByChars("src/data.txt" ); ?? ??????????? ?????????p=0 ; ?? ?????????do { ?? ?????????????scaner(); ?? ?????????????switch (syn){ ?? ?????????????case ?11 :System.out.print("(" +syn+"?,?" );?? ?????????????????????System.out.print(sum); ?? ?????????????????????System.out.println(")" ); ?? ????????????????break ; ?? ?????????????case ?-1 :System.out.println("error!" ); ?? ????????????????break ; ?? ?????????????default : ?? ?????????????????????System.out.print("(" ); ?? ?????????????????????System.out.print(syn);? ?? ?????????????????????System.out.print("?,?" ); ?? ?????????????????????String?str=new ?String(token); ?? ?????????????????????System.out.print(str); ?? ?????????????????????System.out.println(")" ); ?? ?????????????} ?? ?????????}while (syn!=0 ); ?? ????????? ?? ????????? ?? ????} ?? ?????? ????private ?static ?void ?scaner()?throws ?IOException?{??????? ?? ?? ????????for (int ?i=0 ;i<8 ;i++) ?? ????????????token[i]='?' ; ?? ?? ????????ch=prog.charAt(p++); ?? ????????while (ch=='?' ){?? ????????????ch=prog.charAt(p++); ?? ????????} ?? ?? ?? ?? ?? ?? ????????if (ch>='a' &&ch<='z' ){ ?? ????????????m=0 ; ?? ?????????????? ????????????while ((ch>='a' &&ch<='z' )||(ch>='0' &&ch<='9' )){ ?? ????????????????token[m++]=ch; ?? ????????????????ch=prog.charAt(p++); ?? ????????????} ?? ????????????token[m++]='\0' ; ?? ????????????--p; ?? ????????????syn=10 ;?? ?????????????? ????????????String?newStr=new ?String(token); ?? ????????????newStr=newStr.trim(); ?? ?????????????? ????????????for (n=0 ;n<6 ;n++){ ?? ?????????????????? ????????????????if (newStr.equals(rwtable[n])){ ?? ????????????????????syn=n+1 ; ?? ????????????????????System.out.println("syn?的值是:" +syn); ?? ????????????????????break ; ?? ????????????????} ?? ????????????} ?? ????????????token[m++]='\0' ; ?? ????????} ?? ?? ?? ????????else ?if (ch>='0' &&ch<='9' ){ ?? ????????????while (ch>='0' &&ch<='9' ){ ?? ????????????????sum=sum*10 +ch-'0' ; ?? ????????????????ch=prog.charAt(p++); ?? ????????????} ?? ????????????--p; ?? ????????????syn=11 ;?? ????????????token[m++]='\0' ; ?? ????????} ?? ?? ?? ?? ?? ?? ?? ????????else ? ?? ????????switch (ch){ ?? ????????????case '<' : ?? ????????????????m=0 ; ?? ????????????????token[m++]=ch; ?? ????????????????ch=prog.charAt(p++); ?? ????????????????if (ch=='>' ){ ?? ????????????????????syn=21 ;?? ????????????????} ?? ????????????????else ?if (ch=='=' ){ ?? ????????????????????syn=22 ;?? ????????????????????token[m++]=ch; ?? ????????????????} ?? ????????????????else { ?? ????????????????????syn=20 ;?? ????????????????????--p; ?? ????????????????} ?? ????????????break ; ?? ????????????case '>' : ?? ????????????????token[m++]=ch; ?? ????????????????ch=prog.charAt(p++); ?? ????????????????if (ch=='=' ){ ?? ????????????????????syn=24 ;?? ????????????????} ?? ????????????????else { ?? ????????????????????syn=23 ;?? ????????????????????--p; ?? ????????????????} ?? ????????????????break ; ?? ????????????case ':' : ?? ????????????????token[m++]=ch; ?? ????????????????ch=prog.charAt(p++); ?? ????????????????if (ch=='=' ){ ?? ????????????????????syn=18 ;?? ????????????????????token[m++]=ch; ?? ????????????????} ?? ????????????????else { ?? ????????????????????syn=17 ;?? ????????????????????--p; ?? ????????????????} ?? ????????????break ; ?? ????????????case '+' : ?? ????????????????syn=13 ;token[0 ]=ch;token[1 ]='\0' ;break ; ?? ????????????case '-' : ?? ????????????????syn=14 ;token[0 ]=ch;token[1 ]='\0' ;break ; ?? ????????????case '*' : ?? ????????????????syn=15 ;token[0 ]=ch;token[1 ]='\0' ;break ; ?? ????????????case '/' : ?? ????????????????syn=16 ;token[0 ]=ch;token[1 ]='\0' ;break ; ?? ????????????case '=' : ?? ????????????????syn=25 ;token[0 ]=ch;token[1 ]='\0' ;break ; ?? ????????????case ';' : ?? ????????????????syn=26 ;token[0 ]=ch;token[1 ]='\0' ;break ; ?? ????????????case '(' : ?? ????????????????syn=27 ;token[0 ]=ch;token[1 ]='\0' ;break ; ?? ????????????case ')' : ?? ????????????????syn=28 ;token[0 ]=ch;token[1 ]='\0' ;break ; ?? ????????????case '#' : ?? ????????????????syn=0 ;token[0 ]=ch;token[1 ]='\0' ;break ; ?? ????????????default : ?? ????????????????syn=-1 ;? ?? ???????????????? ?? ????????} ?? ???????? ?? ????????File?txt=new ?File("src/nihao.txt" ); ?? ???????????if (!txt.exists()){ ?? ???????????????txt.createNewFile(); ?? ???????????} ?? ???????????byte []?bytes=new ?byte [token.length];?? ???????????for (int ?i=0 ;i<bytes.length?;i++){?? ???????????????byte ?b=(byte )token[i];?? ???????????????bytes[i]=b; ?? ???????????} ?? ???????????FileOutputStream?fos; ?? ????????try ?{ ?? ????????????fos?=?new ?FileOutputStream(txt,true ); ?? ????????????fos.write(syn); ?? ????????????fos.write(bytes); ?? ???????????? ?? ???????????????fos.close(); ?? ????????}?catch ?(Exception?e)?{ ?? ????????????e.printStackTrace(); ?? ????????}?? ?? ????} ?? }??
public class 詞法分析 {/* 初始化數(shù)據(jù)syn為單詞種別碼;token為存放的單詞自身字符串;sum為整型常數(shù)。*/static String prog;static char ch;static char[]token=new char[8];static int syn,p,m,n,sum;static //關(guān)鍵字表的初值String[] rwtable={"begin","if","then","while","do","end"};/*** @param args* @throws IOException */public static void main(String[] args) throws IOException {//1、輸入字符串//prog="begin x:=9; if x>0 then x:=2*x+1/3;end #";//1、從文件中讀取字符串prog=dofile.readFileByChars("src/data.txt");//2、掃描輸出p=0;do{scaner();switch(syn){case 11:System.out.print("("+syn+" , ");//單詞符號:Digit digit*System.out.print(sum);System.out.println(")");break;case -1:System.out.println("error!");break;default:System.out.print("(");System.out.print(syn); System.out.print(" , ");String str=new String(token);System.out.print(str);System.out.println(")");}}while(syn!=0);}//掃描程序private static void scaner() throws IOException {
// 1、初始化for(int i=0;i<8;i++)token[i]=' ';
// 2、讀字母ch=prog.charAt(p++);while(ch==' '){//如果是空格,則取下一個字符ch=prog.charAt(p++);}
// 3、開始執(zhí)行掃描
// 1、是字母
// 讀標(biāo)識符,查保留字表
// 查到,換成屬性字表,寫到輸出流
// 沒查到, 查名表,換成屬性字,寫到輸出流if(ch>='a'&&ch<='z'){m=0;//獲取完整單詞while((ch>='a'&&ch<='z')||(ch>='0'&&ch<='9')){token[m++]=ch;ch=prog.charAt(p++);}token[m++]='\0';--p;syn=10;//單詞符號為letter(letter|digit)*//判斷是哪個關(guān)鍵字String newStr=new String(token);newStr=newStr.trim();//System.out.println("newStr:"+newStr);for(n=0;n<6;n++){//System.out.println("rwtable:"+rwtable[n]);if(newStr.equals(rwtable[n])){syn=n+1;System.out.println("syn 的值是:"+syn);break;}}token[m++]='\0';}
// 2、是數(shù)字
// 取數(shù)字,查常量表,換成屬性字表,寫到輸出流else if(ch>='0'&&ch<='9'){while(ch>='0'&&ch<='9'){sum=sum*10+ch-'0';ch=prog.charAt(p++);}--p;syn=11;//digitdigit*token[m++]='\0';}
// 3、是特殊符號
// 查特殊符號表,換成屬性字。寫到輸出流
// 4、錯誤error
// 4、是否分析結(jié)束
// 未結(jié)束,到2
// 結(jié)束,到出口else switch(ch){case'<':m=0;token[m++]=ch;ch=prog.charAt(p++);if(ch=='>'){syn=21;//<>}else if(ch=='='){syn=22;//<=token[m++]=ch;}else{syn=20;//<--p;}break;case'>':token[m++]=ch;ch=prog.charAt(p++);if(ch=='='){syn=24;//>=}else{syn=23;//>--p;}break;case':':token[m++]=ch;ch=prog.charAt(p++);if(ch=='='){syn=18;//:=token[m++]=ch;}else{syn=17;//:--p;}break;case'+':syn=13;token[0]=ch;token[1]='\0';break;case'-':syn=14;token[0]=ch;token[1]='\0';break;case'*':syn=15;token[0]=ch;token[1]='\0';break;case'/':syn=16;token[0]=ch;token[1]='\0';break;case'=':syn=25;token[0]=ch;token[1]='\0';break;case';':syn=26;token[0]=ch;token[1]='\0';break;case'(':syn=27;token[0]=ch;token[1]='\0';break;case')':syn=28;token[0]=ch;token[1]='\0';break;case'#':syn=0;token[0]=ch;token[1]='\0';break;default:syn=-1; }File txt=new File("src/nihao.txt");if(!txt.exists()){txt.createNewFile();}byte[] bytes=new byte[token.length];//定義一個長度與需要轉(zhuǎn)換的char數(shù)組相同的byte數(shù)組for(int i=0;i<bytes.length ;i++){//循環(huán)將char的每個元素轉(zhuǎn)換并存放在上面定義的byte數(shù)組中byte b=(byte)token[i];//將每個char轉(zhuǎn)換成bytebytes[i]=b;//保存到數(shù)組中}FileOutputStream fos;try {fos = new FileOutputStream(txt,true);fos.write(syn);fos.write(bytes);fos.close();} catch (Exception e) {e.printStackTrace();} }
}
?
?
文件data.txt中的內(nèi)容為:
? begin? x:=9; if x>0? then?? x:=2*x+1/3;end #
?
程序執(zhí)行結(jié)果(控制臺輸出):
?
打開文件 src/data.txt 讀取內(nèi)容為: beginx:=9;ifx>0thenx:=2*x+1/3;end# syn 的值是:1 (1 , begin)
(10,x)
(18,:=)
(11,9)
(26,;)
syn的值是:2
(2,if)
(10,x)
(23,>)
(11,90)
syn的值是:3
(3,then)
(10,x)
(13,+)
(11,902)
(15,*)
(10,x)
(13,+)
(11,9021)
(16,)
(11,90213)
(26,;)
syn的值是:6
(6,end)
(0,#)
?
<!--EndFragment-->
?
與50位技術(shù)專家面對面 20年技術(shù)見證,附贈技術(shù)全景圖
總結(jié)
以上是生活随笔 為你收集整理的一个简单词法分析器的实现代码(java实现) 的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔 網(wǎng)站內(nèi)容還不錯,歡迎將生活随笔 推薦給好友。