hey ppl,
for a project in coll i need to implement a simple lexical analyser in C....i did get a code but wen implemented it recognises EOF after one blank line...as d input n ouput are files...so if any idea as wat could be d reason??? n any codes to implement a simple lexical analyser....???

Recommended Answers

All 4 Replies

Post your code, then perhaps we can tell you where you went wrong.
Don't forget to use the tags around your code.

hey ppl,
for a project in coll i need to implement a simple lexical analyser in C....i did get a code but wen implemented it recognises EOF after one blank line...as d input n ouput are files... any idea as wat could be wrong ??? n any codes to implement a simple lexical analyser....???

here's d code

#include<stdio.h>
#include<string.h>
#include<stdlib.h>
/*MACRO DEFINITIONS*/
#define MAX 50
#define keycount 31
#define opcount 13
#define symcount 16
#define logiccount 9
int line=1;
void is_operator_or_not(char*,int,int,FILE*);
/* KEYWORD TABLE */
/*This array has list of all the standard keywords*/
char key[MAX][MAX]=
{
"auto"
"double",
"int",
"struct",
"break",
"else",
"long",
"switch",
"case",
"enum",
"register",
"typedef",
"char",
"extern",
"return",
"union",
"const",
"float",
"short",
"unsigned",
"continue",
"for",
"signed",
"void",
"default",
"goto",
"sizeof",
"volatile",
"do",
"if",
"static",
"while"
};
/*OPERATOR TABLE*/
/*This array has list of all operators*/
char operatorkey[MAX][MAX]=
{
"+",
"-",
"*",
"/",
"~",
"!",
"=",
"%",
"^",
"&",
">",
"<",
"|"
};
 
/* SYMBOL TABLE */
/* This array has list of symbols used in the program */
char symbol[MAX][MAX]=
{
"{",
"}",
"[",
"]",
"'",
";",
":",
"`",
".",
"@",
"#",
"$",
"(",
")",
"\"",
","
};
/*LOGICAL OPERATOR TABLE */
char logic[MAX][MAX]=
{
"<=",
">=",
"==",
"!=",
"{}",
"[]",
"()",
"&&",
"||"
};
/*Function to check for an operator */
/*
This module is explicitly called by the break uo function
It will check whether the character in array *d[] is an operator or not
This will write the operatorand the token id onto the output file else
return back to the function breakup
*/
void b_operator(char *d,int i,FILE *fp)
{
int r;
/*here check whether the characters in *d[] is an operator or not*/
if(i!=opcount)
{
r=strcmp(operatorkey[i],d);
if(r==0)
{
fprintf(fp,"%d\t\t%s\t\t%s\t\t%d\n\n\n",line,d,"OPERATOR",22);
return;
}
else
{
i++;
b_operator(d,i,fp);
}
}
else
return;
}
/*Function to check for special symbol*/
/*
This module is explicitly called by the break up function
It will check whether the character in the array *d[] is an
special symbol or not.This will write the special symbol
and tokenid onto the output file else return back to
the function breakup */
void b_special(char *d,int i,FILE *fp)
{
int r;
if(i!=symcount)
{
r=strcmp(symbol[i],d);
if(r==0)
{
fprintf(fp,"%d\t\t%s\t\t%s\t\t%d\n\n\n",line,d,"SPECIALSYMBOL",33);
return;
}
else
{
i++;
b_special(d,i,fp);
}
}
else
{
i=0;
b_operator(d,i,fp);
return;
}
return;
}
/*Funtion tocheck for the keyword*/
/*
This module is explicitly called by the break up function
It will check whether the character in the array *d[] is an
keyword or not .This will write the keyword and the token
id onto the output file else return back to function
breakup*/
int b_keyword(char *d,FILE *fp)
{
int j=0;
int r;
while(j!=keycount)
{
r=strcmp(key[j],d);
if(r==0)
{
fprintf(fp,"%d\t\t%s\t\t%s\t\t\t%d\n\n\n",line,d,"KEYWORD",j);
return 1;
}
else
j++;
}
return -1;
}
/*Function to split the input array */
/*In this module we basically split the array into number of parts
For example:
if(a<=b) is split as "if","(","a","<","=","b",")"
And the tokens with the token numbers are written separately onto the
output file */
void t_break(FILE *fp,FILE *f3,char c)
{
int i,j=0,r,len=0;
char d[500];
if(c!=EOF)
{
if(!((c>='a'&&c<='z')||(c>='0'&&c<='9')))
{
j=0;
d[j++]=c;
d[j]='\0';
i=0;
b_special(d,i,fp);
c=getc(f3);
if(c>='0'&&c<='9')
{
j=0;
len=0;
d[j++]=c;
len++;
c=getc(f3);
while((c!='\0')&&((c>='0'&&c<='9')||(c=='.')))
{
d[j++]=c;
len++;
c=getc(f3);
}
if(c>='a'&&c<='z')
{
d[j++]=c;
c=getc(f3);
while(c!='\0')
{
if(!((c>='a'&&c<='z')&&(c>='0'&&c<='9')))
{
d[j++]=c;
c=getc(f3);
}
}
d[j]='\0';
fprintf(fp,"%d\t\t%s\t\t%s\t\t%d\n\n\n",line,d,"INVALID",-1);
return ;
}
j=0;
is_operator_or_not(d,j,len,fp);
}
if(!((c>='0'&&c<='9')&&(c>='a'&&c<='z')))
{
t_break(fp,f3,c);
return;
}
}
else
{
if(c>='a'&&c<='z')
{
j=0;
while((c>='a'&&c<='z')||(c>='0'&&c<='9'))
{
d[j++]=c;
c=getc(f3);
}
d[j]='\0';
r=b_keyword(d,fp);
if(r==-1)
{
fprintf(fp,"%d\t\t%s\t\t%s\t\t%d\n\n\n",line,d,"IDENTIFIER",11);
}
}
if(!(c>='0'&&c<='9')||(c>='a'&&c<='z'))
{
t_break(fp,f3,c);
}
}
return;
}
}
/*Function to check for special symbol*/
/*In this module we check whether the array a* is a special symbol or not
If the character is a special symbol it is written onto the output file
else calls the function breakup to split the array into asequence of valid
tokens*/
void is_special_or_not(char *a,int j,int len,FILE *fp)
{
FILE *f3;
int i;
char c;
if(j!=symcount)
{
int r;
r=strcmp(symbol[j],a);
if(r==0)
{
fprintf(fp,"%d\t\t%s\t\t%s\t\t%d\n\n\n",line,a,"SPECIAL SYMBOL",33);
return;
}
else
{
j++;
is_special_or_not(a,j,len,fp);
return;
}
}
else
{
f3=fopen("temp1.txt","w");
for(i=0;i<=len;i++)
{
c=a[i];
putc(c,f3);
}
fclose(f3);
f3=fopen("temp1.txt","r");
if(f3==NULL)
{
printf("error in opening file \n");
exit(1);
}
c=getc(f3);
t_break(fp,f3,c);
fclose(f3);
return;
}
return;
}
 
/*to check for character constant*/
/*
This module will check whether the string in the array *a[] is a character
constant or not.If it true will print the token idonto the output file
Else calls the function to check whether it is a special symbol or not
*/
void is_char_or_not(char *a,int len,FILE *fp)
{
char c,e,d[25];
int i=0,j=0;
c=a[i++];
if(c=='\'')
{
c=a[i++];
if(c=='\\')
{
c=a[i++];
if(c>='a'&&c<='z')
{
d[j++]=c;
c=a[i++];
if((c>='a'&&c<='z')||(c=='\''))
{
e=c;
c=a[i++];
if(c=='\0')
{
d[j++]=c;
fprintf(fp,"%d\t\t%s\t\t%s\t\t%d\n\n\n",line,d,"CHARCONST",44);
return;
}
if(c=='\'')
{
d[j++]=e;
c=a[i++];
if(c=='\0')
{
d[j]='\0';
fprintf(fp,"%d\t\t%s\t\t%s\t\t%d\n\n\n",line,d,"CHAR CONST",44);
return;
}
}
}
}
}
}
j=0;
is_special_or_not(a,j,len,fp);
return;
}
 
/*Function to check for string constant*/
/*
This module will check whether the array *a is a valid string constant or
not.It checks for all the characters enclosed within double quotes and
prints that to output file along with token id which user defined
IF it is not a string then it calls the function is_a_Char_or_not
*/
void is_string_or_not(char *a,int len,FILE *fp)
{
char d[25];
char c;
int i=0,j=0;
c=a[i++];
if(c=='"')
{
c=a[i++];
while(c>='a'&&c<='z')
{
d[j++]=c;
c=a[j++];
}
if(c=='"')
{
d[j]='\0';
fprintf(fp,"%d\t\t%s\t\t%s\t\t%d\n\n\n",line,d,"STRING CONST",55);
return;
}
else
{
fprintf(fp,"%d\t\t%s\t\t%s\t\t%d\n\n\n",line,a,"INVALID",-1);
return;
}
}
else
{
is_char_or_not(a,len,fp);
return;
}
}
 
/*Function to check logical operators*/
/*
This modules checks for the character in the array *a[] whether it is an
logical operator or not,if it is logical operator it is written on to the
output file else it calls the function checkstring to continue with the
token recognition
*/
void is_logical_or_not(char *a,int j,int len,FILE *fp)
{
while(j!=logiccount)
{
int r;
r=strcmp(logic[j],a);
if(r==0)
{
fprintf(fp,"%d\t\t%s\t\t%s\t\t%d\n\n\n",line,a,"LOGICAL OPR",66);
return;
}
else
{
j++;
}
}
j=0;
is_string_or_not(a,len,fp);
return;
}
/*function to check for integer and floating point constants*/
/*
This modules checks for the character in the array *a[] whether it is an
integer or floating point number or not,if it is an integer then function
returns 0 and it is written on to the output file else it returns back to
the calling program with no return value
*/
int is_float_or_not(char *a,FILE *f2)
{
int x=0,len=0;
char c;
c=getc(f2);
while(c!='\0'&&c>='0'&&c<='9')
{
a[x++]=c;
len++;
c=getc(f2);
}
if(c=='\0')
{
a[x]=c;
return 0;
}
if(c=='.')
{
a[x++]=c;
c=getc(f2);
while(c!='\0'&&c>='0'&&c<='9')
{
a[x++]=c;
c=getc(f2);
}
if(c=='\0')
{
a[x]=c;
return 1;
}
}
if(!(c=='.'&& c>='0'&&c<='9'))
{
while(c!=EOF)
{
a[x++]=c;
len++;
c=getc(f2);
}
a[x]='\0';
return -1;
}
return 4;
}
 
/*function to check for an operator*/
/*
This modules checks for the character in the array *a[] whether it is an
operator or not,if it is operator it is writen on to the output file else
it will call for nextmodule to identify the token
*/
void is_operator_or_not(char *a,int j,int len,FILE *fp)
{
int i=0,b,r;
char c;
int d=1000;
FILE *f2;
while(j!=opcount)
{
r=strcmp(operatorkey[j],a);
if(r==0)
{
fprintf(fp,"%d\t\t%s\t\t%s\t\t%d\n\n\n",line,a,"OPERATOR",d++);
return;
}
else
j++;
}
f2=fopen("temp.txt","w");
for(i=0;i<=len;i++)
{
c=a[i];
putc(c,f2);
}
fclose(f2);
f2=fopen("temp.txt","r");
b=is_float_or_not(a,f2);
if(b==0)
{
fprintf(fp,"%d\t\t%s\t\t%s\t\t%d\n\n\n",line,a,"INTEGER CONST",77);
return;
}
if(b==1)
{
fprintf(fp,"%d\t\t%s\t\t%s\t\t%d\n\n\n",line,a,"FLOATING",88);
return;
}
else
{
j=0;
is_logical_or_not(a,j,len,fp);
return;
}
fclose(fp);
return;
}
 
 
 
 
/*function to check for a keyword*/
/*
This modules checks for the character in the array *a[] whether it is an
keyword or not,if it is operator it is written on to the output file else
it call for next module to identify the token
*/
 
void is_keyword_or_not(char *a,int j,int len,FILE *fp)
{
while(j!=keycount)
{
int r;
r=strcmp(key[j],a);
if(r==0)
{
fprintf(fp,"%d\t\t%s\t\t%s\t\t\t%d\n\n\n",line,a,"KEYWORD",++j);
return;
}
else
j++;
}
j=0;
is_operator_or_not(a,j,len,fp);
return;
}
/*main function*/
/* in the main function the function scans for all the characters until it encounters a blank character or a new line.
Once it encounter either of them the array is then passed for yhe token recognition.
Here in the function main the function checks until in encounters EOF the tokens are scanned.Basically we are
buffering the characters onto an array which is passed to number of modules.Here the function the function main is actual
lex.It eliminates the comment lines and white spaces.Every time we encounter a new line number is incremented.
*/
 
int main()
{
FILE *f1,*fp;
int c,i=0,j=0,len=0;
char buff[25],tem[25];
fp=fopen("out.txt","w");
fprintf(fp,"\n-------------------------------------------------------------------------------------------\n");
fprintf(fp,"%s\t\t%s\t\t%s\t\t%s\n","LINE NO","TOKEN","TOKEN TYPE","TOKEN TYPE","TOKENVALUE");
fprintf(fp,"\n--------------------------------------------------------------------------------------------\n\n\n");
f1=fopen("input.txt","r");
if(f1==NULL)
{
printf("cannot open file \n");
exit(1);
}
while(!feof(f1))
{
while(c!='\n'&&c!=' ')
{
if(c=='/')
{
c=getc(f1);
if(c=='*')
{
c=getc(f1);
i=0;
while(c!='*')
{
if(c=='\n')
{
line++;
}
tem[i++]=c;
c=getc(f1);
}
if(c=='*')
{
c=getc(f1);
if(c=='/')
{
tem[i]='\0';
}
}
}
}
else
{
buff[i++]=c;
len++;
c=getc(f1);
}
}
buff[i]='\0';
is_keyword_or_not(buff,j,len,fp);
j=0;
i=0;
len=0;
if(c=='\n')
line++;
}
fclose(f1);
fclose(fp);
return 0;
}
Member Avatar for iamthwee

You've gotta be joking. Please don't tell me you code without indentation?

Member Avatar for iamthwee

eh I don't

Well then you should.

i.e

if ( something)
{
    if (something else)
   {
    //do stuff
    }
}

Note how all the braces are aligned. Try this then post back please.

Oh and give an example of what your input file looks like

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.