Note:- There are three files that will work by combining in a project.

Most of the code for the Tokenizer class is already provided. You are to complete the routine Tokenizer::getTokens. The routine currently handles single entry, single range start and single range end. Your job is to the put in the code that will handle the subrange start and subrange end cases. The place where your code goes is indicated by a comment.

Compile and execute the project once you have put in your code. Look at the the output generated by the main program to make sure everything is working properly.


------------------------------------------------------------------------------------------------------------------------------------------

//main.cpp


#include <iostream>
#include <stdlib.h>
#include "Tokenizer.h"

int main(int argc, char *argv[])
{
  char entry[80];
  char subentry[80];
  char page[24];
  Tokenizer t("dsaa.txt");
  IndexEntryType iType;
  int indexLineNo = 0;
  
  // It may be worthwhile to send the output to a file instead of cout
  
  while ( (iType = t.getTokens( entry, subentry, page )) != ENDOFFILE )
  {
      indexLineNo++;
      cout << indexLineNo << ": ";
      switch(iType){
      case SINGLEENTRY: 
        cout << "SingleEntry: " << entry << " " << page << endl;
        break;
      case SINGLERANGESTART: 
        cout << "SingleRangeStart: " << entry << " " << page << endl;
        break;
      case SINGLERANGEEND: 
        cout << "SingleRangeEnd: " << entry << " " << page << endl;
        break;
      case SUBRANGESTART: 
        cout << "SubRangeStart: " << entry << " " << subentry << " " << page << endl;
        break;
      case SUBRANGEEND: 
        cout << "SubRangeEnd: " << entry << " " << subentry << " " << page << endl;
        break;
      default:
        cout << "Erroroneous index entry found - skipping " << endl;
        break;
      }
  }   
  system("PAUSE");	
  
  return 0;
}

---------------------------------------------------------------------
---------------------------------------------------------------------

//Tokenizer.cpp


#include "Tokenizer.h"

Tokenizer::Tokenizer(char* indexFilename)
{
    infile = fopen(indexFilename, "r");
    if( infile == NULL){
      cout << "Error opening index file " << indexFilename << endl;
    }
    else
      cout << "Opened index file " << indexFilename << endl;
}

Tokenizer::~Tokenizer()
{
    if( infile != NULL )
        fclose(infile);
}

IndexEntryType Tokenizer::getTokens( char* entry, char* subentry, char* page )
{
    int ip, i, j;
    // see DevC++ Help->Help on DevC++->An Introduction to Programming
    // ->Topics->File handling for details on fopen, fclose, fgets etc.
    if( fgets(line, 80, infile) == NULL ) return ENDOFFILE;
    
    // now scan through the input line and extract index entry and
    // its type. The types are
    // SINGLEENTRY, for example
    //   IX: {Hutchinson, J. P.}      	{12} 
    // SINGLERANGESTART, for example
    //   IX: {Big-Oh notation|(}       	{14}
    // SINGLERANGEEND, for example
    //   IX: {Big-Oh notation|)}       	{16}
    // SUBRANGESTART, for example
    //   IX: {Recursion!four basic rules|(}      	{10}
    // SUBRANGEEND, for example
    //   IX: {Recursion!four basic rules|)}      	{10}

    // first skip over the "IX:" and get to opening "{". The test
    // for null \0 is to guard against going beyond the data in line.
    ip = 0;
    while( line[ip] != '{' && line[ip] != '\0' ) ip++;
    
    if( line[ip] == '\0' ) return ERROR; // this shouldn't happen
    
    // if we this far, we are positioned at '{'. Skip over it and pick
    // index entry. We will stop at '}', '|' or '!'
    
    ip++; i=0; // i is array index for entry
    while( line[ip] != '}' && line[ip] != '|' &&
           line[ip] != '!' && line[ip] != '\0' )
        entry[i++] = line[ip++];
    entry[i] = '\0'; // must null terminate the entry
      
    // we have the main entry. Now take care of range and subrange
    // entries
    
    if( line[ip] == '}' ){
        extractPage( line, ip, page ); // a helper function
        return SINGLEENTRY;
    }
    if( line[ip] == '|' ){
        ip = ip+1; // skip over '|'
        if( line[ip] == '(' ){
            extractPage( line, ip, page ); // a helper function
            return SINGLERANGESTART;
        }        
        if( line[ip] == ')' ){
            extractPage( line, ip, page ); // a helper function
            return SINGLERANGEEND;
        }        
    }
    if( line[ip] == '!' ){ // we have a subrange
    
    
    
        /*****  WRITE CODE *****/    
        /** WRITE THE CORRECT CODE HERE FOR SUBRANGE entry
            The subentry will be extracted and placed in the
            'subentry' paramter. The page number will go in 'page'
        */
        
        
        
        // Temporary: these two lines are to be removed
        subentry[0] = page[0] = '\0';
        return SUBRANGESTART;
    
    }
    else return ERROR;  // something is wrong in the index line.
}

// private methods.

// skip until the page entry in the line. Extract the number between
// {}. e.g., {14}
void Tokenizer::extractPage( char* line, int ip, char* page )
{
    int j = 0;
    // skip characters in line until '{' found
    while(line[ip] != '{' && line[ip] != '\0' )
        ip++;
    if( line[ip] == '\0' ) 
    {
        cout << "Tokenizer::extractPage - error! no page number found in index line " << line << endl;
        page[0] = '\0';
        return;
    }
    ip = ip+1;   // skip over the '{'
    while(line[ip] != '}' && line[ip] != '\0' )
        page[j++] = line[ip++];
    page[j] = '\0';
    return;
}

---------------------------------------------------------------------
---------------------------------------------------------------------

//Tokenizer.h


#ifndef TOKENIZER_H
#define TOKENIZER_H

#include <stdlib.h>
#include <stdio.h>
#include <iostream>        // Provides cout and cin

// token types
enum IndexEntryType {SINGLEENTRY, SINGLERANGESTART, SINGLERANGEEND,
                      SUBRANGESTART, SUBRANGEEND, ERROR, ENDOFFILE};

class Tokenizer
{
public:
    Tokenizer(char* indexFilename);
    ~Tokenizer();
        
    /*
     * each call to getTokens will do the following:
     *  1. read a single line from the index file
     *  2. pick up the main entry
     *  3. pick up the range (start or stop) indicator
     *  4. pick up sub entry if present and its range start or stop
     *       indicator
     *  5. pick up the page number.
     *
     * The routine will load the parameter 'entry' and subentry 
     * (if found). The page number will be loaded in the 'page' 
     * parameter. The function will return value of enum type
     * IndexEntryType according to the entry type.
     */
    IndexEntryType getTokens( char* entry, char* subentry, char* page );
    
private:
    FILE* infile;
    char line[81];  // input buffer for a line from indexfile
    
    void extractPage( char* line, int ip, char* page );
    
};
#endif

------------------------------------------------------------------------------------------------------------------------------------------

What is your question.

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.