|
首先,先说明,我也是第一次贴自己写的东西,希望有相关经验和兴趣的爱好者一起探讨一下, 欢迎批评指正啊, email: [email protected]
我在做一个project, 这是第一步, 就是写一个.l 的词法分析的文件,来抓源程序中的Token, 下一步是语法分析(syntax analysis), 然后进行词法分析,最后优化代码,然后生成汇编代码。
我先把第一部贴出来,有兴趣的,一起探讨一下。
呵呵, 我自己定义的语法,和C差不多,只是比C简单一些,也只有简单的功能,可以实现函数,显示,循环(for, while),流控制(if,switch), 只三种数据类型int, char,float。
文件名:C-.l
[code:1]
/*
* C-like COMPILER
* Project Part One
*
* Author: Yinghao Qin
* Date: 3th 04 2004
*
*/
/* definitions */
#define MaxSymbols 128
#define MaxTableIndex 227
#define MaxStrLen 255
int num_ids = 0;
char symbol[MaxTableIndex][MaxStrLen]; /* use hash table storing identifiers */
struct literalString { /* use linked list storing literal strings */
char *start;
struct literalString *next;
};
typedef struct literalString ls;
ls *stringHead,*stringTail;
%{
#include "stdio.h"
#include "stdlib.h"
%}
eol "\n"
linecomment "//"[^\n]*{eol}
blockcomment "/*"
letter [A-Za-z]
string \".*\"
digit [0-9]
integer [\+-]?{digit}+
float [\+-]?{digit}+\.{digit}+
charactor \'({digit}|{letter})\'
identifier {letter}(_|{letter}|{digit})*
%%
/* pattern action */
/* comments like these need to be indented with whitespace */
{linecomment} { printf(" LINECOMMENT\n");}
{blockcomment} { printf(" BLOCKCOMMENT");blcomment();}
{string} { printf(" STRING");saveString();}
{integer} { printf(" INT_VAL");}
{float} { printf(" FLOAT_VAL");}
{charactor} { printf(" CHAR_VAL");}
"{" { printf(" L_BRACE\n");}
"}" { printf(" R_BRACE\n");}
"(" { printf(" L_BRACKET");}
")" { printf(" R_BRACKET\n");}
"int" { printf(" INT");}
"float" { printf(" FLOAT");}
"char" { printf(" CHAR");}
"if" { printf(" IF");}
"else" { printf(" ELSE");}
"switch" { printf(" SWITCH");}
"case" { printf(" CASE");}
"default" { printf(" DEFAULT");}
"break" { printf(" BREAK");}
"continue" { printf(" CONTINUE");}
"for" { printf(" FOR");}
"do" { printf(" do");}
"while" { printf(" WHILE");}
"main" { printf(" MAIN");}
"printf" { printf(" PRINTF");}
"goto" { printf(" GOTO");}
"sizeof" { printf(" SIZEOF");}
"return" { printf(" RETURN");}
">>=" { printf(" RIGHT_ASSIGN"); }
"<<=" { printf(" LEFT_ASSIGN"); }
"+=" { printf(" ADD_ASSIGN"); }
"-=" { printf(" SUB_ASSIGN"); }
"*=" { printf(" MUL_ASSIGN"); }
"/=" { printf(" DIV_ASSIGN"); }
"%=" { printf(" MOD_ASSIGN"); }
"&=" { printf(" AND_ASSIGN"); }
"^=" { printf(" XOR_ASSIGN"); }
"|=" { printf(" OR_ASSIGN"); }
">>" { printf(" RIGHT_OP"); }
"<<" { printf(" LEFT_OP"); }
"++" { printf(" INC_OP"); }
"--" { printf(" DEC_OP"); }
"&&" { printf(" AND_OP"); }
"||" { printf(" OR_OP"); }
"<=" { printf(" LE_OP"); }
">=" { printf(" GE_OP"); }
"==" { printf(" EQ_OP"); }
"!=" { printf(" NE_OP"); }
";" { printf(" ;\n"); }
"." { printf(" ."); }
"&" { printf(" &"); }
"!" { printf(" !"); }
"-" { printf(" -"); }
"+" { printf(" +"); }
"*" { printf(" *"); }
"/" { printf(" /"); }
"%" { printf(" %"); }
"<" { printf(" <"); }
">" { printf(" >"); }
"^" { printf(" ^"); }
"|" { printf(" |"); }
"=" { printf(" ="); }
"?" { printf(" ?"); }
":" { printf(" :"); }
{identifier} { printf(" ID");add2table();}
"," { printf(" ,");}
[ \t\n]+ /* eat up whitespace */
. { printf(" Unrecognized character:%s\n", yytext);}
%%
/*user supplied functions */
int main( argc, argv)
int argc;
char **argv;
{
++argv, --argc; /* skip over program name */
if(argc>0){
yyin = fopen( argv[0], "r");
}
else{
printf("Default usage: ./a.out test.cc -s\n");
yyin = stdin;
}
stringHead=NULL;
stringTail=NULL;
initHashTable();
yylex(); /* call the flex-generated main function */
displayString(); /* display literal string*/
if (argc>1){
if(0==strcmp(argv[1],"-s") )
printSymbolTable();
}
printf("\nTip:using '-s' to show symbol table(eg: \"./a.out test.cc -s\")\n\n");
return 0;
}
int blcomment()
{
int c;
while((c=input())!=0){
if(c=='*'){ /* if find * sign, then check the following charactor */
if((c=input())=='/') /* if found / following a *, then comment end */
break;
else
unput(c);
}
}
putchar('\n');
return 0;
}
/* initialize Hash Table */
int initHashTable()
{
int i=0;
for(;i<MaxTableIndex;i++){
symbol[i][0]='*';
}
return 0;
}
/* add identifier into the symbol table */
int add2table()
{
int index =0;
if(MaxSymbols<=num_ids) {
num_ids++;
return;
}
index = getIndex(yytext);
if(index!=-1){
strcpy(symbol[index], yytext);
num_ids++;
}
return 0;
}
/* get the hash code for an identifier, may get code after second hashing */
int getIndex(char *str)
{
int index =0;
index = getHashIndex(str);
while(0!=strcmp(symbol[index],str)){
if(symbol[index][0]=='*')
return index;
else
index = (++index)%MaxTableIndex;
}
return -1; /* can not find a empty room for the new identifier */
}
/* generate hash code */
int getHashIndex(char *str)
{
int i=0;
long tmp = 0;
int index = 0;
for(;i<strlen(str);i++)
tmp=tmp*10+str[i];
index=(int)(tmp%MaxTableIndex);
return index;
}
char *getMem(char *str)
{
char *head=NULL;
head = (char *)malloc(strlen(str)-1); /* get rid of the two dbl quote signs and need one byte for '\0' */
if(head==0){
perror("\nvirtual memory exhausted!\n");
exit (1);
}
else
memset(head,0,sizeof(str)); /* initialize the allocated memory*/
return head;
}
int saveString()
{
ls *head=NULL;
int i=0;
head=(ls *)malloc(sizeof(ls));
head->next=NULL;
head->start=getMem(yytext);
for(;i<strlen(yytext)-2;i++)
head->start[i] = yytext[i+1];
head->start[i]='\0';
head->start[strlen(yytext)]='\0';
if(NULL==stringHead){
stringHead=stringTail=head;
}else{
stringTail->next=head;
stringTail=stringTail->next;
}
return 0;
}
int displayString()
{
ls *head;
int numStrings=0;
int numChars=0;
head = stringHead;
printf("\n\nLiteral Strings:\n");
printf("==================\n");
while(head){
printf(" %i. ",++numStrings);
printf(" %s \n",head->start);
numChars+=strlen(head->start)+1;
head = head->next;
}
printf("------------------\n");
printf("There is %i bytes memories used.\n",numChars);
return 0;
}
int printSymbolTable()
{
int i=0;
printf("\nSymbol Table(Max symbols:128)\n===========\n");
for(;i<MaxTableIndex;i++){
if(0!=strcmp(symbol[i],"*")){
printf(" %s\t\n",symbol[i]);
}
}
printf( "_______________________\n");
printf("\nThere are %i identifiers totally\n\n", num_ids);
if(127<num_ids)
printf("Error: Too many symbols! The maximum numbers of symbols is %i!\n", MaxSymbols);
return 0;
}
[/code:1]
下面是我写的一个测试程序源文件 test.c-, 这个只是用来测试是否正确抓到tokens,和parsing table是否生成正确
[code:1]
/*
** C-like COMPILER
** Author: Yinghao Qin
**
** This is the test C- language file --- multi-line comments
**
*/
int main()
{
//char type variable
char x='a';
int i= 1;
float average=0.0;
int result1=0,result2=0;
//display x
printf(" x :"+x);
//use 'for' loop calculating the sum from 1 to 1000
for( ; i<=1000; i++)
{
result1+=i;
}
//use 'while' loop calculating the sum from 1 to 1000
i = 1;
while(1)
{
result2+=i
if(1000==i) break;
else
i++;
}
//get the average value
average = (float)(result1/i);
switch (result1-result2)
{
case 0:
//note: printf function in C- is a little bit different from C
printf("Right, the average is " + average); break;
default:
printf("Wrong!");//this line never be executed
break;
}
printf("The Max is :" + getMax( average, result1));
return 0;
}
int getMax(int a, int b)
{
return a>b?a:b;
}
[/code:1] |
|