简单词法分析器实现
編寫分析器有兩種方法,一種是通過DFA對單詞進(jìn)行識別,二是通過直接編敲代碼進(jìn)行識別。
#include<stdio.h> #include<string.h> #include<stdlib.h> //用指針而不是二維數(shù)組表示。這樣就不用指定字符串長度,僅僅是不能改動指針?biāo)赶蜃址膬?nèi)容 char *key[]={"int","char","float","double","void","const","for","if","else","then","while","switch","break","main","return"}; char buffer[20];//存儲當(dāng)前識別出的單詞 char *identifier[50]; char *num[50]; int ident_num;//標(biāo)志符數(shù) int number;//數(shù)字?jǐn)?shù) int judgement_num(char c) {if (c >= '0' && c <= '9') return 0;else if (c == '.') return 1;return -1; } int judge(char c){if(c=='_') return 2;else if(c>='0'&&c<='9') return 1;else if(c>='a'&&c<='z'||c>='A'&&c<='Z') return 0;return -1; } //next_i,next_j分別代表著狀態(tài)轉(zhuǎn)換表move的當(dāng)前狀態(tài)與接收到的字符 //width代表每一狀態(tài)可能遇到的狀態(tài)數(shù),length代表終態(tài)集大小 int DFA(int begin,int move[],int width,int final[],int length,int(*judge)(char)){int len=0,next_i,next_j,tmp;char next_char;memset(buffer,0,sizeof(buffer));next_char=getchar();next_i=begin;while((next_j=judge(next_char))!=-1){tmp=next_i;next_i=move[next_i*width+next_j];if(next_i==-1){printf("move[%d][%d]has not next state\n",tmp,next_j);return -1;}buffer[len++]=next_char;next_char=getchar();}ungetc(next_char,stdin);buffer[len]='\0';for(int i=0;i<length;i++){if(next_i==final[i]) return 0;}return -1; }void is_comment(){char next_char;char tmp[5];memset(tmp,0,sizeof(tmp));tmp[0]=getchar();next_char=getchar();if(next_char=='*'){while((next_char=getchar())!=EOF){if(next_char=='*'&&getchar()=='/') return;}printf("The comment is error\n");}else if(next_char=='/'){while((next_char=getchar())!='\n');}else if(next_char=='='){printf("(/=,_)\n");}else{printf("(/,_)\n");ungetc(next_char,stdin);} } void is_other(){char next_char,c=getchar();if(c=='+'||c=='-'||c=='*'||c=='%'){next_char=getchar();if(next_char=='=') printf("(%c=,_)\n",c);else {printf("(%c,_)\n",c);ungetc(next_char,stdin);}}else if(c=='>'||c=='<'){next_char=getchar();if(next_char=='=') printf("(rlop,%c=)\n",c);else {printf("(rlop,%c)\n",c);ungetc(next_char,stdin);}}else if(c=='!'){next_char=getchar();if(next_char=='=') printf("(!=,_)\n");else {printf("(not,_)\n");ungetc(next_char,stdin);}}else if(c=='|'){next_char=getchar();if (next_char == '|')printf("(or,||)\n");else {ungetc(next_char, stdin);}}else if(c=='&'){next_char=getchar();if (next_char == '&')printf("(and,&&)\n");else {ungetc(next_char, stdin);}}else if(c=='='||c=='('||c==')'||c=='['||c==']'||c==';'||c==','||c=='{'||c=='}'){printf("(%c,_)\n",c);} } void is_digit(){int begin=0;int move[]={1,-1,1,2,2,-1};int final[]={1,2};int result=-1;result=DFA(begin,move,2,final,2,judgement_num);if(result==-1){printf("digit DFA error\n");exit(-1);}else if(result==0){//printf("%s\n",buffer);for(int i=0;i<number;i++){if(strcmp(buffer,num[i])==0){printf("(num,%s)\n",buffer);return;}}num[number]=(char*)malloc(sizeof(buffer));strcpy(num[number++],buffer);printf("(num,%s)\n",buffer);return;} } void is_letter(){int i;int begin=0;int move[] ={1,-1,1,1,1,1};int final[]={1,2};int result=-1;result=DFA(begin,move,3,final,1,judge);if(result==-1){printf("letter DFA error\n");exit(-1);}else if(result==0){int len=sizeof(key)/sizeof(char *);//如為keywordfor(i=0;i<len;i++){if(strcmp(buffer,key[i])==0){printf("(key,%s)\n",key[i]);return;}}//如為標(biāo)志符for(i=0;i<ident_num;i++){//在當(dāng)前標(biāo)志符表中已經(jīng)存在if(strcmp(buffer,identifier[i])==0){printf("(%id,%s)\n",identifier[i]);return;}}//如不存在,則寫入identifier[ident_num]=(char*)malloc(sizeof(buffer));strcmp(identifier[ident_num++],buffer);printf("(id,%s)\n",buffer);return;} }void init() {ident_num=0;number=0; } void work() {char c;while((c=getchar())!=EOF){ungetc(c,stdin);if(judge(c)==2||judge(c)==0) is_letter();else if(judge(c)==1) is_digit();else if(c=='/') is_comment();else is_other();} }int main() {freopen("in.txt","r",stdin);init();work();return 0; }
本程序採用DFA對單詞進(jìn)行識別。
DFA的實現(xiàn)方法。大概思想和書上一致,在程序中,則是用二維數(shù)組代表狀態(tài)轉(zhuǎn)換矩陣,用一維數(shù)組表示終態(tài)。
一個詞法編輯要實現(xiàn)的功能主要包含下面幾點:
可以識別標(biāo)識符、keyword、數(shù)字和運算符,對凝視進(jìn)行過濾。同一時候還能識別出程序錯誤。
使用說明:
本程序的輸入由當(dāng)前文件夾下的in.txt文件讀取輸入,輸出為一系列二元式#include<stdio.h> #include<string.h> #include<stdlib.h> //用指針而不是二維數(shù)組表示。這樣就不用指定字符串長度,僅僅是不能改動指針?biāo)赶蜃址膬?nèi)容 char *key[]={"int","char","float","double","void","const","for","if","else","then","while","switch","break","main","return"}; char buffer[20];//存儲當(dāng)前識別出的單詞 char *identifier[50]; char *num[50]; int ident_num;//標(biāo)志符數(shù) int number;//數(shù)字?jǐn)?shù) int judgement_num(char c) {if (c >= '0' && c <= '9') return 0;else if (c == '.') return 1;return -1; } int judge(char c){if(c=='_') return 2;else if(c>='0'&&c<='9') return 1;else if(c>='a'&&c<='z'||c>='A'&&c<='Z') return 0;return -1; } //next_i,next_j分別代表著狀態(tài)轉(zhuǎn)換表move的當(dāng)前狀態(tài)與接收到的字符 //width代表每一狀態(tài)可能遇到的狀態(tài)數(shù),length代表終態(tài)集大小 int DFA(int begin,int move[],int width,int final[],int length,int(*judge)(char)){int len=0,next_i,next_j,tmp;char next_char;memset(buffer,0,sizeof(buffer));next_char=getchar();next_i=begin;while((next_j=judge(next_char))!=-1){tmp=next_i;next_i=move[next_i*width+next_j];if(next_i==-1){printf("move[%d][%d]has not next state\n",tmp,next_j);return -1;}buffer[len++]=next_char;next_char=getchar();}ungetc(next_char,stdin);buffer[len]='\0';for(int i=0;i<length;i++){if(next_i==final[i]) return 0;}return -1; }void is_comment(){char next_char;char tmp[5];memset(tmp,0,sizeof(tmp));tmp[0]=getchar();next_char=getchar();if(next_char=='*'){while((next_char=getchar())!=EOF){if(next_char=='*'&&getchar()=='/') return;}printf("The comment is error\n");}else if(next_char=='/'){while((next_char=getchar())!='\n');}else if(next_char=='='){printf("(/=,_)\n");}else{printf("(/,_)\n");ungetc(next_char,stdin);} } void is_other(){char next_char,c=getchar();if(c=='+'||c=='-'||c=='*'||c=='%'){next_char=getchar();if(next_char=='=') printf("(%c=,_)\n",c);else {printf("(%c,_)\n",c);ungetc(next_char,stdin);}}else if(c=='>'||c=='<'){next_char=getchar();if(next_char=='=') printf("(rlop,%c=)\n",c);else {printf("(rlop,%c)\n",c);ungetc(next_char,stdin);}}else if(c=='!'){next_char=getchar();if(next_char=='=') printf("(!=,_)\n");else {printf("(not,_)\n");ungetc(next_char,stdin);}}else if(c=='|'){next_char=getchar();if (next_char == '|')printf("(or,||)\n");else {ungetc(next_char, stdin);}}else if(c=='&'){next_char=getchar();if (next_char == '&')printf("(and,&&)\n");else {ungetc(next_char, stdin);}}else if(c=='='||c=='('||c==')'||c=='['||c==']'||c==';'||c==','||c=='{'||c=='}'){printf("(%c,_)\n",c);} } void is_digit(){int begin=0;int move[]={1,-1,1,2,2,-1};int final[]={1,2};int result=-1;result=DFA(begin,move,2,final,2,judgement_num);if(result==-1){printf("digit DFA error\n");exit(-1);}else if(result==0){//printf("%s\n",buffer);for(int i=0;i<number;i++){if(strcmp(buffer,num[i])==0){printf("(num,%s)\n",buffer);return;}}num[number]=(char*)malloc(sizeof(buffer));strcpy(num[number++],buffer);printf("(num,%s)\n",buffer);return;} } void is_letter(){int i;int begin=0;int move[] ={1,-1,1,1,1,1};int final[]={1,2};int result=-1;result=DFA(begin,move,3,final,1,judge);if(result==-1){printf("letter DFA error\n");exit(-1);}else if(result==0){int len=sizeof(key)/sizeof(char *);//如為keywordfor(i=0;i<len;i++){if(strcmp(buffer,key[i])==0){printf("(key,%s)\n",key[i]);return;}}//如為標(biāo)志符for(i=0;i<ident_num;i++){//在當(dāng)前標(biāo)志符表中已經(jīng)存在if(strcmp(buffer,identifier[i])==0){printf("(%id,%s)\n",identifier[i]);return;}}//如不存在,則寫入identifier[ident_num]=(char*)malloc(sizeof(buffer));strcmp(identifier[ident_num++],buffer);printf("(id,%s)\n",buffer);return;} }void init() {ident_num=0;number=0; } void work() {char c;while((c=getchar())!=EOF){ungetc(c,stdin);if(judge(c)==2||judge(c)==0) is_letter();else if(judge(c)==1) is_digit();else if(c=='/') is_comment();else is_other();} }int main() {freopen("in.txt","r",stdin);init();work();return 0; }
總結(jié)
- 上一篇: 21、Java并发性和多线程-Java中
- 下一篇: 合格前端系列第五弹- Virtual D