i have a project of constructing vompiler using C++ language.
but i don't know how to make the lexical,syntax and semantic phasse and how to generate errors.
can anyone help me?

Well, yes, though this sort of project is really a great deal more complex than is likely to be easily resolved in a single post, or even an entire thread of posts. This thread on Dev Shed is probably still a suitable starting place, especially the second post in it, even if some of the links are stale; you might also want to see the Compiler Development Wiki as well, though that is frequently offline. Finally, you may want to look at some existing compiler projects, such as my own Suntiger Algol compiler (it is written in Python, rather than C++, and the desing is a bit unusual, but it should be clear enough to give some idea of how it works).

Can you tell us what you already know about, say, leixcal analysis, and show us what (if anything) you've done so far?

Edited 2 Years Ago by Schol-R-LEA

Bold Text Here

    #include<iostream.h>
    #include<conio.h>
    #include<fstream.h>
    #include<stdio.h>
    #include<string.h>
    void main()
    {
    clrscr();
    char abc[100]="include";
    char ghi[100]="#<.>=+";
    char aa[100]=" ";
    char def[100]="abcdefghijklmnopqrstuvwxyz";
    char *a[10][10];
    char bb[100]="(){}";
    char mo[100]="1234567890abcdefghijklmnopqrstuvwxyz";
    char lo[100]="1234567890.";
    char *p="0";
    int len;
    char str[100];
    int i,j;
    int m,n;
    for(m=0;m<1;m++)
            {
              for(n=0;n<8;n++)
                {
                 if(m==0)
                {
    if(n==0)
    {
    a[m][n]="lineno";
    cout<<a[m][n];
    }
    else if(n==1)
    {
    cout<<"      ";
    a[m][n]="keyword";
    cout<<a[m][n];
    }
    else if(n==2)
    {
    cout<<"    ";
    a[m][n]="eof";
    cout<<a[m][n];
    }
    else if(n==3)
    {
    cout<<"    ";
    a[m][n]="space";
    cout<<a[m][n];
    }
    else if(n==4)
    {
    cout<<"  ";
    a[m][n]="braces";
    cout<<a[m][n];
    }
    else if(n==5)
    {
    cout<<"  ";
    a[m][n]="variable";
    cout<<a[m][n];
    }
    else if(n==6)
    {
    cout<<"  ";
    a[m][n]="operator";
    cout<<a[m][n];
    }
    else if(n==7)
    {
    cout<<"  ";
    a[m][n]="value";
    cout<<a[m][n];
    }
    else
    {
    getch();
    }
    } }}
    cout<<endl;
    m=1;
    ifstream ifo("d:\\show.txt");
      while(!ifo.eof())
        {
        ifo.getline(str,100);
        if(str[0]=='#')
          {
          cout<<m<<"      ";
          for(int i=0;i<8;i++)
        {
        for(int j=0;j<7;j++)
          {
          if(str[i]==abc[j])
            {
            *p=str[i];
            a[m][1]=p;
            cout<<a[m][1];
            }
          }
        }
        for(i=8;i<20;i++)
          {
          for(j=0;j<25;j++)
            {
            if(str[i]==def[j])
              {
              *p=str[i];
              a[m][1]=p;
              cout<<a[m][1];
              }
            }
          }
          cout<<"                                  ";

              for(i=0;i<20;i++)
               {
               for(j=0;j<6;j++)
            {
             if(str[i]==ghi[j])
              {
              *p=str[i];
              a[m][2]=p;
              cout<<a[m][2];
              }
             }
               }
               cout<<endl;
              }
       else if(str[0]=='v'&&str[1]=='o'&&str[2]=='i'&&str[3]=='d')
          {
          cout<<m<<"       ";
          for(i=0;i<4;i++)
        {
        for(j=0;j<25;j++)
          {
          if(str[i]==def[j])
            {
            *p=str[i];
            a[m][1]=p;
            cout<<a[m][1];

            }
          }
        }

        for(i=5;i<11;i++)
        {
        for(j=0;j<25;j++)
          {
          if(str[i]==def[j])
            {
            *p=str[i];
            a[m][1]=p;
            cout<<a[m][1];

            }
          }
        }
        cout<<"                ";
          for(i=0;i<11;i++)
        {
        for(j=0;j<1;j++)
          {
          if(str[i]==aa[j])
            {
            str[i]='1';
            *p=str[i];
            a[m][3]=p;
            cout<<a[m][3];

            }
          }
        }
        cout<<"      ";
           for(i=0;i<11;i++)
        {
        for(j=0;j<4;j++)
          {
          if(str[i]==bb[j])
            {
            *p=str[i];
            a[m][4]=p;
            cout<<a[m][4];
            }
          }
        }
        cout<<endl;
          }
          else if(str[0]=='{')
          {
          cout<<m<<"     ";
          cout<<"                                 ";
          for(j=0;j<4;j++)
            {
            if(str[0]==bb[j])
              {
              *p=str[0];
              a[m][4]=p;
              cout<<a[m][4];
              }
            }
            cout<<endl;
          }
          else if(str[0]=='i'&&str[1]=='n'&&str[2]=='t')
          {
          cout<<m<<"           ";
          len=strlen(str);
          for(i=0;i<3;i++)
          {
          for(j=0;j<25;j++)
            {
            if(str[i]==def[j])
              {
              *p=str[i];
              a[m][1]=p;
              cout<<a[m][1];
              }
            }
          }
          *p=str[len-1];
           a[m][2]=p;
           cout<<"         ";
           cout<<a[m][2];
        *p=str[3];
           a[m][3]=p;
           cout<<"       ";
           a[m][3]="1";
           cout<<a[m][3];
           *p=str[4];
           a[m][5]=p;
           cout<<"               ";
           cout<<a[m][5];
           if(len>6)
           {
           cout<<"         ";
           *p=str[5];
           a[m][6]=p;
           cout<<a[m][6];
           cout<<"       ";
           for(int i=0;i<len-1;i++)
           {
           for(int j=0;j<10;j++)
           {
           if(str[i]==lo[j])
           {
           *p=str[i];
           a[m][7]=p;
           cout<<a[m][7];
           }
           }
           }}
           cout<<endl;
          }
          else if(str[0]=='c'&&str[1]=='h'&&str[2]=='a'&&str[3]=='r')
          {
          cout<<m<<"           ";
          len=strlen(str);
          for(i=0;i<5;i++)
          {
          for(j=0;j<26;j++)
            {
            if(str[i]==def[j])
              {
              *p=str[i];
              a[m][1]=p;
              cout<<a[m][1];
              }
            }
          }
          *p=str[len-1];
           a[m][2]=p;
           cout<<"        ";
           cout<<a[m][2];
           *p=str[4];
           a[m][3]=p;
           cout<<"       ";
           a[m][3]="1";
           cout<<a[m][3];
           cout<<"               ";
           *p=str[5];
           a[m][5]=p;
           cout<<a[m][5];
           if(len>7)
           {
           cout<<"         ";
           *p=str[6];
           a[m][6]=p;
           cout<<a[m][6];
           cout<<"       ";
           for(int i=7;i<len-1;i++)
           {
           for(int j=0;j<36;j++)
           {
           if(str[i]==mo[j])
           {
           *p=str[i];
           a[m][7]=p;
           cout<<a[m][7];
           }
           }
           }}
           cout<<endl;
          }
           else if(str[0]=='f'&&str[1]=='l'&&str[2]=='o'&&str[3]=='a'&&str[4]=='t')
          {
          cout<<m<<"           ";
          len=strlen(str);
          for(i=0;i<6;i++)
          {
          for(j=0;j<26;j++)
            {
            if(str[i]==def[j])
              {
              *p=str[i];
              a[m][1]=p;
              cout<<a[m][1];
              }
            }
          }
          *p=str[len-1];
           a[m][2]=p;
           cout<<"       ";
           cout<<a[m][2];
           *p=str[5];
           a[m][3]=p;
           cout<<"       ";
           a[m][3]="1";
           cout<<a[m][3];
           cout<<"               ";
           *p=str[6];
           a[m][5]=p;
           cout<<a[m][5];
           if(len>8)
           {
           cout<<"         ";
           *p=str[7];
           a[m][6]=p;
           cout<<a[m][6];
           cout<<"       ";
           for(int i=7;i<len-1;i++)
           {
           for(int j=0;j<36;j++)
           {
           if(str[i]==lo[j])
           {
           *p=str[i];
           a[m][7]=p;
           cout<<a[m][7];
           }
           }
           }}
           cout<<endl;
          }
          else if(str[0]=='}')
          {
          cout<<m<<"     ";
          cout<<"                                 ";
          for(j=0;j<4;j++)
            {
            if(str[0]==bb[j])
              {
              *p=str[0];
              a[m][4]=p;
              cout<<a[m][4];
              }
            }
            cout<<endl;
          }
          else
          {
          len=strlen(str);
          cout<<m<<"                ";
          *p=str[len-1];
           a[m][2]=p;
           cout<<"       ";
           cout<<a[m][2];
          cout<<"                      ";
          for(i=0;i<len;i++)
          {
          for(j=0;j<26;j++)
          {
          if(str[i]==def[j])
          {
          *p=str[i];
          a[m][5]=p;
          cout<<a[m][5];
          }
          }
          }
          cout<<"        ";
          for(i=0;i<len;i++)
          {
          for(j=0;j<6;j++)
          {
          if(str[i]==ghi[j])
          {
          *p=str[i];
          a[m][6]=p;
          cout<<a[m][6];
          }
          }
          }
          cout<<endl;
          }
          m++;
        }
        ifo.close();
        getch();
    }






    #include<conio.h>
    #include<fstream.h>
    #include<stdio.h>
    #include<string.h>
    void main()
    {
    clrscr();
    char abc[100]="include";
    char ghi[100]="#<.>=+";
    char aa[100]=" ";
    char def[100]="abcdefghijklmnopqrstuvwxyz";
    char *a[10][10];
    char bb[100]="(){}";
    char mo[100]="1234567890abcdefghijklmnopqrstuvwxyz";
    char lo[100]="1234567890.";
    char *p="0";
    int len;
    char str[100];
    int i,j;
    int m,n;
    for(m=0;m<1;m++)
            {
              for(n=0;n<8;n++)
                {
                 if(m==0)
                {
    if(n==0)
    {
    a[m][n]="lineno";
    cout<<a[m][n];
    }
    else if(n==1)
    {
    cout<<"      ";
    a[m][n]="keyword";
    cout<<a[m][n];
    }
    else if(n==2)
    {
    cout<<"    ";
    a[m][n]="eof";
    cout<<a[m][n];
    }
    else if(n==3)
    {
    cout<<"    ";
    a[m][n]="space";
    cout<<a[m][n];
    }
    else if(n==4)
    {
    cout<<"  ";
    a[m][n]="braces";
    cout<<a[m][n];
    }
    else if(n==5)
    {
    cout<<"  ";
    a[m][n]="variable";
    cout<<a[m][n];
    }
    else if(n==6)
    {
    cout<<"  ";
    a[m][n]="operator";
    cout<<a[m][n];
    }
    else if(n==7)
    {
    cout<<"  ";
    a[m][n]="value";
    cout<<a[m][n];
    }
    else
    {
    getch();
    }
    } }}
    cout<<endl;
    m=1;
    ifstream ifo("d:\\show.txt");
      while(!ifo.eof())
        {
        ifo.getline(str,100);
        if(str[0]=='#')
          {
          cout<<m<<"      ";
          for(int i=0;i<8;i++)
        {
        for(int j=0;j<7;j++)
          {
          if(str[i]==abc[j])
            {
            *p=str[i];
            a[m][1]=p;
            cout<<a[m][1];
            }
          }
        }
        for(i=8;i<20;i++)
          {
          for(j=0;j<25;j++)
            {
            if(str[i]==def[j])
              {
              *p=str[i];
              a[m][1]=p;
              cout<<a[m][1];
              }
            }
          }
          cout<<"                                  ";

              for(i=0;i<20;i++)
               {
               for(j=0;j<6;j++)
            {
             if(str[i]==ghi[j])
              {
              *p=str[i];
              a[m][2]=p;
              cout<<a[m][2];
              }
             }
               }
               cout<<endl;
              }
       else if(str[0]=='v'&&str[1]=='o'&&str[2]=='i'&&str[3]=='d')
          {
          cout<<m<<"       ";
          for(i=0;i<4;i++)
        {
        for(j=0;j<25;j++)
          {
          if(str[i]==def[j])
            {
            *p=str[i];
            a[m][1]=p;
            cout<<a[m][1];

            }
          }
        }

        for(i=5;i<11;i++)
        {
        for(j=0;j<25;j++)
          {
          if(str[i]==def[j])
            {
            *p=str[i];
            a[m][1]=p;
            cout<<a[m][1];

            }
          }
        }
        cout<<"                ";
          for(i=0;i<11;i++)
        {
        for(j=0;j<1;j++)
          {
          if(str[i]==aa[j])
            {
            str[i]='1';
            *p=str[i];
            a[m][3]=p;
            cout<<a[m][3];

            }
          }
        }
        cout<<"      ";
           for(i=0;i<11;i++)
        {
        for(j=0;j<4;j++)
          {
          if(str[i]==bb[j])
            {
            *p=str[i];
            a[m][4]=p;
            cout<<a[m][4];
            }
          }
        }
        cout<<endl;
          }
          else if(str[0]=='{')
          {
          cout<<m<<"     ";
          cout<<"                                 ";
          for(j=0;j<4;j++)
            {
            if(str[0]==bb[j])
              {
              *p=str[0];
              a[m][4]=p;
              cout<<a[m][4];
              }
            }
            cout<<endl;
          }
          else if(str[0]=='i'&&str[1]=='n'&&str[2]=='t')
          {
          cout<<m<<"           ";
          len=strlen(str);
          for(i=0;i<3;i++)
          {
          for(j=0;j<25;j++)
            {
            if(str[i]==def[j])
              {
              *p=str[i];
              a[m][1]=p;
              cout<<a[m][1];
              }
            }
          }
          *p=str[len-1];
           a[m][2]=p;
           cout<<"         ";
           cout<<a[m][2];
        *p=str[3];
           a[m][3]=p;
           cout<<"       ";
           a[m][3]="1";
           cout<<a[m][3];
           *p=str[4];
           a[m][5]=p;
           cout<<"               ";
           cout<<a[m][5];
           if(len>6)
           {
           cout<<"         ";
           *p=str[5];
           a[m][6]=p;
           cout<<a[m][6];
           cout<<"       ";
           for(int i=0;i<len-1;i++)
           {
           for(int j=0;j<10;j++)
           {
           if(str[i]==lo[j])
           {
           *p=str[i];
           a[m][7]=p;
           cout<<a[m][7];
           }
           }
           }}
           cout<<endl;
          }
          else if(str[0]=='c'&&str[1]=='h'&&str[2]=='a'&&str[3]=='r')
          {
          cout<<m<<"           ";
          len=strlen(str);
          for(i=0;i<5;i++)
          {
          for(j=0;j<26;j++)
            {
            if(str[i]==def[j])
              {
              *p=str[i];
              a[m][1]=p;
              cout<<a[m][1];
              }
            }
          }
          *p=str[len-1];
           a[m][2]=p;
           cout<<"        ";
           cout<<a[m][2];
           *p=str[4];
           a[m][3]=p;
           cout<<"       ";
           a[m][3]="1";
           cout<<a[m][3];
           cout<<"               ";
           *p=str[5];
           a[m][5]=p;
           cout<<a[m][5];
           if(len>7)
           {
           cout<<"         ";
           *p=str[6];
           a[m][6]=p;
           cout<<a[m][6];
           cout<<"       ";
           for(int i=7;i<len-1;i++)
           {
           for(int j=0;j<36;j++)
           {
           if(str[i]==mo[j])
           {
           *p=str[i];
           a[m][7]=p;
           cout<<a[m][7];
           }
           }
           }}
           cout<<endl;
          }
           else if(str[0]=='f'&&str[1]=='l'&&str[2]=='o'&&str[3]=='a'&&str[4]=='t')
          {
          cout<<m<<"           ";
          len=strlen(str);
          for(i=0;i<6;i++)
          {
          for(j=0;j<26;j++)
            {
            if(str[i]==def[j])
              {
              *p=str[i];
              a[m][1]=p;
              cout<<a[m][1];
              }
            }
          }
          *p=str[len-1];
           a[m][2]=p;
           cout<<"       ";
           cout<<a[m][2];
           *p=str[5];
           a[m][3]=p;
           cout<<"       ";
           a[m][3]="1";
           cout<<a[m][3];
           cout<<"               ";
           *p=str[6];
           a[m][5]=p;
           cout<<a[m][5];
           if(len>8)
           {
           cout<<"         ";
           *p=str[7];
           a[m][6]=p;
           cout<<a[m][6];
           cout<<"       ";
           for(int i=7;i<len-1;i++)
           {
           for(int j=0;j<36;j++)
           {
           if(str[i]==lo[j])
           {
           *p=str[i];
           a[m][7]=p;
           cout<<a[m][7];
           }
           }
           }}
           cout<<endl;
          }
          else if(str[0]=='}')
          {
          cout<<m<<"     ";
          cout<<"                                 ";
          for(j=0;j<4;j++)
            {
            if(str[0]==bb[j])
              {
              *p=str[0];
              a[m][4]=p;
              cout<<a[m][4];
              }
            }
            cout<<endl;
          }
          else
          {
          len=strlen(str);
          cout<<m<<"                ";
          *p=str[len-1];
           a[m][2]=p;
           cout<<"       ";
           cout<<a[m][2];
          cout<<"                      ";
          for(i=0;i<len;i++)
          {
          for(j=0;j<26;j++)
          {
          if(str[i]==def[j])
          {
          *p=str[i];
          a[m][5]=p;
          cout<<a[m][5];
          }
          }
          }
          cout<<"        ";
          for(i=0;i<len;i++)
          {
          for(j=0;j<6;j++)
          {
          if(str[i]==ghi[j])
          {
          *p=str[i];
          a[m][6]=p;
          cout<<a[m][6];
          }
          }
          }
          cout<<endl;
          }
          m++;
        }
        ifo.close();
        getch();
    }

i have made the above program which tokenzine the code on basis of keywords,operators,eol etc.
after this i wnt that my code should check the lexical errors i.e if there is any mistake in spellings of valid identifers than code should generate an error for it

OK, let's start with a few issues in your code. First off, main() should always be declared as type int in C++; there are some exceptions to this, but as a rule, void main() is simply incorrect.

Second, you seem to be using an older style of C++ header; with modern C++ (that is, after 1998), the standard library headers should not end with an .h, as they do in C, and C libraries should be pre-pended with a c. Thus, you should have the follow directives instead:

#include <iostream>
#include <fstream>
#include <cstdio>
#include <string>

Note that I removed the reference to <conio.h>; this is not a standard header, but one specific to the older DOS versions of Turbo C++, and should never be used in any new code for any reason. Since you don't seem to actually use it, except to capture the screen, it is safe to simply eliminate this.

I'm guessing that you are using Turbo C++, correct? If you have any choice in the matter, don't. It is now more than 25 years old, and is tied to functions of the DOS operating system which modern Windows systems no longer support (especially with Vista and later). By using Turbo C++, you are shooting yourself in the foot. I know that some schools have standardized on Turbo C++, but I will tell you this right now: you are learning a version of C++ that is nearly 15 years out of date. Get Code::Blocks or Visual Studio Express if you can, and avoid a lot of headaches later.

As for the code itself, I have to agree with NathanOliver that it is extremely difficult to understand your code. This is not the sort of program you can write flat-out in a single function and expect it to be readable and workable. You should definitely be more careful about indenting your code in a legible fashion, too; the indentation starts to break down around the third nested level, and never really gets clear after that.

The first thing I would do is try to put the common code into functions, so that you aren't repeating the same thing over and over again. I would move most of the keyword constants into a table of token structures, like so:

enum TOKTYPE { NONE, STR_LITERAL, NUM_LITERAL, IDENTIFIER, KEYWORD }; 

struct Token {
    std::string token; 
    TOKTYPE type;
};

std::vector<Token> tokens;

This is just a starting point; you'll probably want to define Token as a class, rather than a POD struct.

Edited 2 Years Ago by Schol-R-LEA

@merry gold
1 Suggestion ,
Read the Deitels book , Its totally worth It ,
(Programming is not about making a program these days , Its about making a readable , developable program)
@Schol-R-LEA
Thank you so much for the tips , but one question , what is the replacement for getch(); ?

Edited 2 Years Ago by ArashVenus

ArashVenus: There really isn't any exact replacement for getch(), for the same reason why there wasn't a standard equivalent of getch() to begin with: because 'raw' or unbuffered console I/O isn't portable across different operating systems. The C standard library, and the C++ iostream classes, work exclusively with stream I/O, which does not allow for unbuffered keyboard input. As far as the stream I/O functions are concerned, it is irrelevant whether the input comes from a console, a file, a pipe, or a network socket. While this is a useful abstraction, it makes writing screen manipulation routines harder. The closest equivalent to the conio functions that is even remotely portable would be Curses or its variants, and even then it is mostly aimed at Unix-like systems.

That having been said, the most common use for getch() was to pause the console at the end of the program, so that the window doesn't close. This isn't needed in more recent IDEs like Code::Blocks or the current version of Visual Studio, as they automatically pause the console window when the program is done.

Edited 2 Years Ago by Schol-R-LEA

This article has been dead for over six months. Start a new discussion instead.