當(dāng)前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

一个简单词法分析器的实现代码（java实现）

發(fā)布時間：2024/4/17 编程问答 33 豆豆

生活随笔收集整理的這篇文章主要介紹了一个简单词法分析器的实现代码（java实现）小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

http://www.cnblogs.com/xuqiang/archive/2010/09/21/1953501.html

Main.java

/** 主程序*/ import java.io.*; import lexer.*;public class Main {public static void main(String[] args) throws IOException {Lexer lexer = new Lexer();while (lexer.getReaderState() == false) {lexer.scan();}/* 保存相關(guān)信息 */lexer.saveTokens();lexer.saveSymbolsTable();} }

Lexer.java

package lexer;import java.io.*; import java.util.*;import symbols.*;public class Lexer {public static int line = 1; /* 記錄行號 */char peek = ' '; /* 下一個讀入字符 */Hashtable<String, Word> words = new Hashtable<String, Word>();/* 符號表 */private Hashtable<Token, String> table = new Hashtable<Token, String>();/* token序列 */private List<String> tokens = new LinkedList<String> ();/* 讀取文件變量 */BufferedReader reader = null; /* 保存當(dāng)前是否讀取到了文件的結(jié)尾 */private Boolean isEnd = false;/* 是否讀取到文件的結(jié)尾 */public Boolean getReaderState() {return this.isEnd;}/* 保存存儲在table中的 */public void saveSymbolsTable() throws IOException {FileWriter writer = new FileWriter("符號表.txt");writer.write("[符號] [符號類型信息]\n");writer.write("\r\n");Enumeration<Token> e = table.keys();while( e.hasMoreElements() ){Token token = (Token)e.nextElement();String desc = table.get(token);/* 寫入文件 */writer.write(token + "\t\t\t" + desc + "\r\n");}writer.flush();}/* 保存Tokens */public void saveTokens() throws IOException {FileWriter writer = new FileWriter("Tokens表.txt");writer.write("[符號] \n");writer.write("\r\n");for(int i = 0; i < tokens.size(); ++i) {String tok = (String)tokens.get(i);/* 寫入文件 */writer.write(tok + "\r\n");} writer.flush();}void reserve(Word w) {words.put(w.lexme, w);}/** 構(gòu)造函數(shù)中將關(guān)鍵字和類型添加到hashtable words中*/public Lexer() {/* 初始化讀取文件變量 */try {reader = new BufferedReader(new FileReader("輸入.txt"));}catch(IOException e) {System.out.print(e);}/* 關(guān)鍵字 */this.reserve(new Word("if", Tag.IF));this.reserve(new Word("then", Tag.THEN));this.reserve(new Word("else", Tag.ELSE));this.reserve(new Word("while", Tag.WHILE));this.reserve(new Word("do", Tag.DO));/* 類型 */this.reserve(Word.True);this.reserve(Word.False);this.reserve(Type.Int);this.reserve(Type.Char);this.reserve(Type.Bool);this.reserve(Type.Float);}public void readch() throws IOException {/* 這里應(yīng)該是使用的是 */peek = (char)reader.read();if((int)peek == 0xffff){this.isEnd = true;}// peek = (char)System.in.read();}public Boolean readch(char ch) throws IOException {readch();if (this.peek != ch) {return false;}this.peek = ' ';return true;}public Token scan() throws IOException {/* 消除空白 */ for( ; ; readch() ) {if(peek == ' ' || peek == '\t')continue;else if (peek == '\n') line = line + 1;elsebreak;}/* 下面開始分割關(guān)鍵字，標(biāo)識符等信息 */switch (peek) {/* 對于 ==, >=, <=, !=的區(qū)分使用狀態(tài)機實現(xiàn) */case '=' :if (readch('=')) {tokens.add("==");return Word.eq; }else {tokens.add("=");return new Token('=');}case '>' :if (readch('=')) {tokens.add(">=");return Word.ge;}else {tokens.add(">");return new Token('>');}case '<' :if (readch('=')) {tokens.add("<=");return Word.le;}else {tokens.add("<");return new Token('<');}case '!' :if (readch('=')) {tokens.add("!=");return Word.ne;}else {tokens.add("!");return new Token('!');} }/* 下面是對數(shù)字的識別，根據(jù)文法的規(guī)定的話，這里的* 數(shù)字只要是能夠識別整數(shù)就行.*/if(Character.isDigit(peek)) {int value = 0;do {value = 10 * value + Character.digit(peek, 10);readch();} while (Character.isDigit(peek));Num n = new Num(value);tokens.add(n.toString());//table.put(n, "Num");return n;}/** 關(guān)鍵字或者是標(biāo)識符的識別*/if(Character.isLetter(peek)) {StringBuffer sb = new StringBuffer();/* 首先得到整個的一個分割 */do {sb.append(peek);readch();} while (Character.isLetterOrDigit(peek));/* 判斷是關(guān)鍵字還是標(biāo)識符 */String s = sb.toString();Word w = (Word)words.get(s);/* 如果是關(guān)鍵字或者是類型的話，w不應(yīng)該是空的 */if(w != null) {// table.put(w, "KeyWord or Type");tokens.add(w.toString());return w; /* 說明是關(guān)鍵字或者是類型名 */}/* 否則就是一個標(biāo)識符id */w = new Word(s, Tag.ID);tokens.add(w.toString());table.put(w, "id");words.put(s, w);return w;}/* peek中的任意字符都被認(rèn)為是詞法單元返回 */Token tok = new Token(peek);// table.put(tok, "Token or Seprator");if ((int)peek != 0xffff ) tokens.add(tok.toString());peek = ' ';return tok;} }

Num.java

package lexer;public class Num extends Token{public final int value;public Num(int v) {super(Tag.NUM);this.value = v;}public String toString() {return "" + value;} }

Tag.java

package lexer;public class Tag {public final static intAND = 256,BASIC = 257,BREAK = 258,DO = 259,ELSE = 260,EQ = 261, /* == */FALSE = 262,GE = 263,ID = 264,IF = 265,INDEX = 266,LE = 267,MINUS = 268,NE = 269,NUM = 270,OR = 271,REAL = 272,TEMP = 273,TRUE = 274,WHILE = 275,/* 后面添加 */THEN = 276; }

Token.java

package lexer;public class Token {public final int tag;public Token(int t) {this.tag = t;}public String toString() {return "" + (char)tag;}public static void main(String[] args) {Token tok = new Token('a');System.out.println(tok);} }

Word.java

/** 類word用于管理保留字，標(biāo)識符以及像&&這樣的復(fù)合單詞元素。*/ package lexer;public class Word extends Token {public String lexme = "";public Word (String s, int t) {super(t);this.lexme = s;}public String toString() {return this.lexme;}public static final Word and = new Word("&&", Tag.AND),or = new Word("||", Tag.OR),eq = new Word ("==", Tag.EQ),ne = new Word("!=", Tag.NE),le = new Word("<=", Tag.LE),ge = new Word(">=", Tag.GE),minus = new Word("minus", Tag.MINUS),True = new Word("true", Tag.TRUE),False = new Word("false", Tag.FALSE),temp = new Word("t", Tag.TEMP); }

Type.java

/** 說明數(shù)據(jù)類型*/ package symbols;import lexer.*;public class Type extends Word{public Type(String s, int tag) {super(s, tag);}public static final TypeInt = new Type("int", Tag.BASIC),Float = new Type("float", Tag.BASIC),Char = new Type ("char", Tag.BASIC),Bool = new Type("bool", Tag.BASIC);}

＝＝＝＝＝＝＝＝＝＝＝＝

http://freewxy.iteye.com/blog/870016

什么是詞法？?

??所謂詞法，源代碼由字符流組成，字符流中包括關(guān)鍵字，變量名，方法名，括號等等符號，其中變量名要滿足不能包括標(biāo)點符號，不能以數(shù)字開頭的數(shù)字與字母的字符串這個條件，對于括號要成對出現(xiàn)等等，這就是詞法;

什么是詞法分析？

??詞法分析階段是編譯過程的第一個階段。這個階段的任務(wù)是從左到右一個字符一個字符地讀入源程序，即對構(gòu)成源程序的字符流進(jìn)行掃描然后根據(jù)構(gòu)詞規(guī)則識別單詞(也稱單詞符號或符號)。

待分析的簡單語言的詞法：

?1)?關(guān)鍵字

??begin?if?then?while?do?end

?2)?運算符和界符

??:=?+?-?*?/?<?<=?>?>=?<>?=?;?(?)?#

?3)?其他單詞是標(biāo)識符(ID)和整形常數(shù)(NUM)，通過以下正規(guī)式定義：

??ID=letter(letter|digit)*

??NUM=digitdigit*

?4)?空格由空白、制表符和換行符組成。空格一般用來分隔ID、NUM、運算符、界符和關(guān)鍵字，詞法分析階段通常被忽略。

??????????????????????? 各種單詞符號對應(yīng)的種別編碼

單詞符號	種別碼	單詞符號	種別碼
begin	1	:	17
if	2	:=	18
then	3	<	20
while	4	<>	21
do	5	<=	22
end	6	>	23
letter(letter\|digit)*	10	>=	24
digitdigit*	11	=	25
+	13	;	26
-	14	(	27
*	15	)	28
/	16	#	0

詞法分析程序的功能：

??輸入：所給文法的源程序字符串

??輸出：二元組（syn,?token或sum）構(gòu)成的序列。

??syn為單詞種別碼；

??token為存放的單詞自身字符串；

??sum為整形常數(shù)。

??例如：對源程序begin?x:=9;if?x>0?then?x:=2*x+1/3;end#?經(jīng)詞法分析后輸出如下序列：（1，begin）(10,’x’)?(18,:=)?(11,9)?(26,;)?(2,if)……

流程圖：

?源碼：

Java代碼 ?

public?class?詞法分析?{ ??

?? ??

????/*??初始化數(shù)據(jù) ?

??????syn為單詞種別碼； ?

??????token為存放的單詞自身字符串； ?

??????sum為整型常數(shù)。 ?

?????*/??

????static?String?prog; ??

????static?char?ch; ??

????static?char[]token=new?char[8]; ??

????static?int?syn,p,m,n,sum; ??

????static?//關(guān)鍵字表的初值??

????String[]?rwtable={"begin","if","then","while","do","end"}; ??

???? ??

????/** ?

?????*?@param?args ?

?????*?@throws?IOException? ?

?????*/??

????public?static?void?main(String[]?args)?throws?IOException?{ ??

?????????//1、輸入字符串 ??

?????????//prog="begin??x:=9;?if?x>0??then???x:=2*x+1/3;end?#";??

?????????//1、從文件中讀取字符串 ??

????????prog=dofile.readFileByChars("src/data.txt"); ??

?????????//2、掃描輸出 ??

?????????p=0; ??

?????????do{ ??

?????????????scaner(); ??

?????????????switch(syn){ ??

?????????????case?11:System.out.print("("+syn+"?,?");//單詞符號：Digit?digit*??

?????????????????????System.out.print(sum); ??

?????????????????????System.out.println(")"); ??

????????????????break; ??

?????????????case?-1:System.out.println("error!"); ??

????????????????break; ??

?????????????default: ??

?????????????????????System.out.print("("); ??

?????????????????????System.out.print(syn);? ??

?????????????????????System.out.print("?,?"); ??

?????????????????????String?str=new?String(token); ??

?????????????????????System.out.print(str); ??

?????????????????????System.out.println(")"); ??

?????????????} ??

?????????}while(syn!=0); ??

????????? ??

????} ??

????//掃描程序 ??

????private?static?void?scaner()?throws?IOException?{??????? ??

//??????1、初始化 ??

????????for(int?i=0;i<8;i++) ??

????????????token[i]='?'; ??

//??????2、讀字母 ??

????????ch=prog.charAt(p++); ??

????????while(ch=='?'){//如果是空格，則取下一個字符??

????????????ch=prog.charAt(p++); ??

????????} ??

//??????3、開始執(zhí)行掃描 ??

//??????????1、是字母 ??

//?????????????????????讀標(biāo)識符，查保留字表 ??

//?????????????????????????查到，換成屬性字表，寫到輸出流??

//?????????????????????????沒查到，?查名表，換成屬性字，寫到輸出流??

????????if(ch>='a'&&ch<='z'){ ??

????????????m=0; ??

????????????//獲取完整單詞 ??

????????????while((ch>='a'&&ch<='z')||(ch>='0'&&ch<='9')){ ??

????????????????token[m++]=ch; ??

????????????????ch=prog.charAt(p++); ??

????????????} ??

????????????token[m++]='\0'; ??

????????????--p; ??

????????????syn=10;//單詞符號為letter(letter|digit)*??

????????????//判斷是哪個關(guān)鍵字 ??

????????????String?newStr=new?String(token); ??

????????????newStr=newStr.trim(); ??

????????????//System.out.println("newStr:"+newStr);??

????????????for(n=0;n<6;n++){ ??

????????????????//System.out.println("rwtable:"+rwtable[n]);??

????????????????if(newStr.equals(rwtable[n])){ ??

????????????????????syn=n+1; ??

????????????????????System.out.println("syn?的值是："+syn); ??

????????????????????break; ??

????????????????} ??

????????????} ??

????????????token[m++]='\0'; ??

????????} ??

//??????????2、是數(shù)字 ??

//?????????????????????????取數(shù)字，查常量表，換成屬性字表，寫到輸出流??

????????else?if(ch>='0'&&ch<='9'){ ??

????????????while(ch>='0'&&ch<='9'){ ??

????????????????sum=sum*10+ch-'0'; ??

????????????????ch=prog.charAt(p++); ??

????????????} ??

????????????--p; ??

????????????syn=11;//digitdigit*??

????????????token[m++]='\0'; ??

????????} ??

//??????????3、是特殊符號 ??

//?????????????????????????查特殊符號表，換成屬性字。寫到輸出流??

//??????????4、錯誤error ??

//??????4、是否分析結(jié)束 ??

//??????????????未結(jié)束，到2 ??

//??????????????結(jié)束，到出口 ??

????????else? ??

????????switch(ch){ ??

????????????case'<': ??

????????????????m=0; ??

????????????????token[m++]=ch; ??

????????????????ch=prog.charAt(p++); ??

????????????????if(ch=='>'){ ??

????????????????????syn=21;//<>??

????????????????} ??

????????????????else?if(ch=='='){ ??

????????????????????syn=22;//<=??

????????????????????token[m++]=ch; ??

????????????????} ??

????????????????else{ ??

????????????????????syn=20;//<??

????????????????????--p; ??

????????????????} ??

????????????break; ??

????????????case'>': ??

????????????????token[m++]=ch; ??

????????????????ch=prog.charAt(p++); ??

????????????????if(ch=='='){ ??

????????????????????syn=24;//>=??

????????????????} ??

????????????????else{ ??

????????????????????syn=23;//>??

????????????????????--p; ??

????????????????} ??

????????????????break; ??

????????????case':': ??

????????????????token[m++]=ch; ??

????????????????ch=prog.charAt(p++); ??

????????????????if(ch=='='){ ??

????????????????????syn=18;//:=??

????????????????????token[m++]=ch; ??

????????????????} ??

????????????????else{ ??

????????????????????syn=17;//:??

????????????????????--p; ??

????????????????} ??

????????????break; ??

????????????case'+': ??

????????????????syn=13;token[0]=ch;token[1]='\0';break; ??

????????????case'-': ??

????????????????syn=14;token[0]=ch;token[1]='\0';break; ??

????????????case'*': ??

????????????????syn=15;token[0]=ch;token[1]='\0';break; ??

????????????case'/': ??

????????????????syn=16;token[0]=ch;token[1]='\0';break; ??

????????????case'=': ??

????????????????syn=25;token[0]=ch;token[1]='\0';break; ??

????????????case';': ??

????????????????syn=26;token[0]=ch;token[1]='\0';break; ??

????????????case'(': ??

????????????????syn=27;token[0]=ch;token[1]='\0';break; ??

????????????case')': ??

????????????????syn=28;token[0]=ch;token[1]='\0';break; ??

????????????case'#': ??

????????????????syn=0;token[0]=ch;token[1]='\0';break; ??

????????????default: ??

????????????????syn=-1;? ??

???????????????? ??

????????} ??

???????? ??

????????File?txt=new?File("src/nihao.txt"); ??

???????????if(!txt.exists()){ ??

???????????????txt.createNewFile(); ??

???????????} ??

???????????byte[]?bytes=new?byte[token.length];//定義一個長度與需要轉(zhuǎn)換的char數(shù)組相同的byte數(shù)組??

???????????for(int?i=0;i<bytes.length?;i++){//循環(huán)將char的每個元素轉(zhuǎn)換并存放在上面定義的byte數(shù)組中??

???????????????byte?b=(byte)token[i];//將每個char轉(zhuǎn)換成byte??

???????????????bytes[i]=b;//保存到數(shù)組中 ??

???????????} ??

???????????FileOutputStream?fos; ??

????????try?{ ??

????????????fos?=?new?FileOutputStream(txt,true); ??

????????????fos.write(syn); ??

????????????fos.write(bytes); ??

???????????? ??

???????????????fos.close(); ??

????????}?catch?(Exception?e)?{ ??

????????????e.printStackTrace(); ??

????????}?? ??

????} ??

}??

public class 詞法分析 {/* 初始化數(shù)據(jù)syn為單詞種別碼；token為存放的單詞自身字符串；sum為整型常數(shù)。*/static String prog;static char ch;static char[]token=new char[8];static int syn,p,m,n,sum;static //關(guān)鍵字表的初值String[] rwtable={"begin","if","then","while","do","end"};/*** @param args* @throws IOException */public static void main(String[] args) throws IOException {//1、輸入字符串//prog="begin x:=9; if x>0 then x:=2*x+1/3;end #";//1、從文件中讀取字符串prog=dofile.readFileByChars("src/data.txt");//2、掃描輸出p=0;do{scaner();switch(syn){case 11:System.out.print("("+syn+" , ");//單詞符號：Digit digit*System.out.print(sum);System.out.println(")");break;case -1:System.out.println("error!");break;default:System.out.print("(");System.out.print(syn); System.out.print(" , ");String str=new String(token);System.out.print(str);System.out.println(")");}}while(syn!=0);}//掃描程序private static void scaner() throws IOException { // 1、初始化for(int i=0;i<8;i++)token[i]=' '; // 2、讀字母ch=prog.charAt(p++);while(ch==' '){//如果是空格，則取下一個字符ch=prog.charAt(p++);} // 3、開始執(zhí)行掃描 // 1、是字母 // 讀標(biāo)識符，查保留字表 // 查到，換成屬性字表，寫到輸出流 // 沒查到，查名表，換成屬性字，寫到輸出流if(ch>='a'&&ch<='z'){m=0;//獲取完整單詞while((ch>='a'&&ch<='z')||(ch>='0'&&ch<='9')){token[m++]=ch;ch=prog.charAt(p++);}token[m++]='\0';--p;syn=10;//單詞符號為letter(letter|digit)*//判斷是哪個關(guān)鍵字String newStr=new String(token);newStr=newStr.trim();//System.out.println("newStr:"+newStr);for(n=0;n<6;n++){//System.out.println("rwtable:"+rwtable[n]);if(newStr.equals(rwtable[n])){syn=n+1;System.out.println("syn 的值是："+syn);break;}}token[m++]='\0';} // 2、是數(shù)字 // 取數(shù)字，查常量表，換成屬性字表，寫到輸出流else if(ch>='0'&&ch<='9'){while(ch>='0'&&ch<='9'){sum=sum*10+ch-'0';ch=prog.charAt(p++);}--p;syn=11;//digitdigit*token[m++]='\0';} // 3、是特殊符號 // 查特殊符號表，換成屬性字。寫到輸出流 // 4、錯誤error // 4、是否分析結(jié)束 // 未結(jié)束，到2 // 結(jié)束，到出口else switch(ch){case'<':m=0;token[m++]=ch;ch=prog.charAt(p++);if(ch=='>'){syn=21;//<>}else if(ch=='='){syn=22;//<=token[m++]=ch;}else{syn=20;//<--p;}break;case'>':token[m++]=ch;ch=prog.charAt(p++);if(ch=='='){syn=24;//>=}else{syn=23;//>--p;}break;case':':token[m++]=ch;ch=prog.charAt(p++);if(ch=='='){syn=18;//:=token[m++]=ch;}else{syn=17;//:--p;}break;case'+':syn=13;token[0]=ch;token[1]='\0';break;case'-':syn=14;token[0]=ch;token[1]='\0';break;case'*':syn=15;token[0]=ch;token[1]='\0';break;case'/':syn=16;token[0]=ch;token[1]='\0';break;case'=':syn=25;token[0]=ch;token[1]='\0';break;case';':syn=26;token[0]=ch;token[1]='\0';break;case'(':syn=27;token[0]=ch;token[1]='\0';break;case')':syn=28;token[0]=ch;token[1]='\0';break;case'#':syn=0;token[0]=ch;token[1]='\0';break;default:syn=-1; }File txt=new File("src/nihao.txt");if(!txt.exists()){txt.createNewFile();}byte[] bytes=new byte[token.length];//定義一個長度與需要轉(zhuǎn)換的char數(shù)組相同的byte數(shù)組for(int i=0;i<bytes.length ;i++){//循環(huán)將char的每個元素轉(zhuǎn)換并存放在上面定義的byte數(shù)組中byte b=(byte)token[i];//將每個char轉(zhuǎn)換成bytebytes[i]=b;//保存到數(shù)組中}FileOutputStream fos;try {fos = new FileOutputStream(txt,true);fos.write(syn);fos.write(bytes);fos.close();} catch (Exception e) {e.printStackTrace();} } }

文件data.txt中的內(nèi)容為：

? begin? x:=9; if x>0? then?? x:=2*x+1/3;end #

程序執(zhí)行結(jié)果(控制臺輸出)：

打開文件 src/data.txt 讀取內(nèi)容為：
beginx:=9;ifx>0thenx:=2*x+1/3;end#
syn 的值是：1
(1 , begin)

(10,x)

(18,:=)

(11,9)

(26,;)

syn的值是：2

(2,if)

(10,x)

(23,>)

(11,90)

syn的值是：3

(3,then)

(10,x)

(13,+)

(11,902)

(15,*)

(10,x)

(13,+)

(11,9021)

(16,)

(11,90213)

(26,;)

syn的值是：6

(6,end)

(0,#)

與50位技術(shù)專家面對面20年技術(shù)見證，附贈技術(shù)全景圖

總結(jié)

以上是生活随笔為你收集整理的一个简单词法分析器的实现代码（java实现）的全部內(nèi)容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯，歡迎將生活随笔推薦給好友。

上一篇： Java语言语法语义分析器设计与实现
下一篇：利用状态图实现词法分析

3atv精品不卡视频,97人人超碰国产精品最新,中文字幕av一区二区三区人妻少妇,久久久精品波多野结衣,日韩一区二区三区精品

编程问答

一个简单词法分析器的实现代码（java实现）

總結(jié)