Hi all,

I am a newbie to programming and have almost complete my assignment 'Frequency of words and phrases' to a satisfactory standard. My program reads a text file and counts the number of phrases within the file (A phrase is defined as a sequence of one or more words), my program counts phrases up-to 10 words long.

e.g. 'The' occurs 25 times (one word phrase), 'negative refractive index is' occurs 12 times (four word phrase)

I can print out the phrases that occur more than once, however my program still includes phrases that occur once, which I do not want.

Can anyone help?

Thanks

#include <iostream> 
#include <fstream> 
#include <string>   
#include <map>
#include <vector>
#include <queue>
#include <list>

using namespace std;

struct Phrase {
                    void display() const
                    {
                     for (int i = 0; i < words.size(); ++i)
                     cout << words[i] << " "; 
                     }
                     vector<string> words;
                     };


map<Phrase, int> stat;

struct PhraseFreqPred {
                                  bool operator()(const Phrase& p1, const Phrase& p2)
                                {
                                  return stat[p1] > stat[p2];
                                 }
                                 };

bool operator<(const Phrase& lhs, const Phrase& rhs)
{
  return lhs.words < rhs.words;
}

typedef vector<Phrase> container_t;

void display(container_t c, const int length, const int limit)
{
    sort(c.begin(), c.end(), PhraseFreqPred());
    cout << "Phrases of " << length << " word(s):\n";

    for (int i = 0; i < limit && i < c.size(); ++i)
    {
      const Phrase& p = c[i];

      p.display();
      cout << ": " << stat[p] << "\n";
    }

    cout << "\n";
}

void updateStat(list<string>& words, const string& word)
{
      list<string>::const_iterator iter = words.begin();

      Phrase cur;
      while (iter != words.end())
      {
        cur.words.push_back(*iter++);
        ++stat[cur];
      }

      words.pop_front();
}

int main(){

           string filename;
           cout << "Enter filename: ";
           cin >> filename;            // inputting and reading input filename

ifstream input(filename.c_str());
  
           if (!input){
                       cerr << "Failed to open input file " << filename << endl;
                       return 1;
                        }                // error in reading the input file

  const int LIMIT = 10;

  string word;
  list<string> words;

  while (input >> word)
  {
    for (int i = 0; i < word.size(); ++i)
      word[i] = tolower(word[i]);

    words.push_back(word);

    if (words.size() == LIMIT)
    {     
      updateStat(words, word);
    }
  }

  while (!words.empty())
    updateStat(words, word);

  map<int, container_t> lengths;
  map<Phrase, int>::const_iterator iter1 = stat.begin();

  while (iter1 != stat.end())
  {
    const Phrase& p = iter1->first;

    lengths[p.words.size()].push_back(p);
    ++iter1;
  }

  map<int, container_t >::const_iterator iter2 = lengths.begin();

  while (iter2 != lengths.end())
  {
    const int length = iter2->first;
    const container_t& c = iter2->second;

    display(c, length, 5);

    ++iter2;
  }

  while (true) {  
               cout << "Do you wish to see phrases in more detail (y or n)?: ";			   
               string choice;
               cin >> choice;

               
               if (choice == "n")
               break;

               cout << "Enter number of word in phrase to display: ";
               int num;
               cin >> num;

               if (num < 1 || lengths.find(num) == lengths.end())
               cout << "Not found\n\n"<< endl;
               
               else {
                      const container_t& c = lengths[num];
                      display(c, num, c.size());
	       }
           
                       }

  return 0;
}

Also, I am having trouble making any character except 'Y' or 'y' terminate the programme. I have played about with the if statements and also thought about using switch(choice) but come up with errors!!!

while (true) {  
               cout << "Do you wish to see phrases in more detail (y or n)?: ";		   
               string choice;
               cin >> choice;

               
               if (choice == "n")
               break;

               cout << "Enter number of word in phrase to display: ";
               int num;
               cin >> num;

               if (num < 1 || lengths.find(num) == lengths.end())
               cout << "Not found\n\n"<< endl;
               
               else {
                       const container_t& c = lengths[num];
                       display(c, num, c.size());
	       }

>>e.g. 'The' occurs 25 times (one word phrase), 'negative refractive index is' occurs 12 times (four word phrase)

How about "The negative refractive index is" -- Is that one phrase or two phrases. Or is it 5 one-word phrases ? In otherwisds, the description you have posted is ambiguous and makes no sense.


>>I am having trouble making any character except 'Y' or 'y' terminate the programme

>> if (choice == "n")
Then change that to if( choice == 'y' && choice != 'Y')

The left handed material is best described as a material that possesses simultaneous negative electric permittivity and magnetic permeability. Veselago theorised this concept in his Soviet Physics paper in 1967, and discusses the wonderful and weird properties that these materials exhibit. Since there is no natural substance possessing these properties the ideas of Veselago lay dormant for over thirty years, until theoretical physicist John Pendry published a paper in 1999 that discussed how artificial structures could be used to make a left handed material a reality.

The appears 3 times and is a phrase containing one word.
a appears 4 times and is a phrase containing one word.
Left handed material appears 2 times and is a phrase containing three words.
Left handed appears 2 times and is a phrase containing two words.
handed material appears 2 times and is a phrase containing two words.

I am looking for phrases of up to 10 words in text files that occur more than once. Hope it is a little more clearer :-/


Thank you Ancient Dragon the use of && worked a treat.

I had been thinking along those lines, but probably way off with...

if ( choice != "y","Y")

I had been thinking along those lines, but probably way off with...

if ( choice != "y","Y")

Yes, you're off. You have to test each character individually. Something along the lines of if ((n < 5) || (n == 10)) which is TRUE if n is 4 or less OR equal to 10

Something along the lines of if ((n < 5) || (n == 10)) which is TRUE if n is 4 or less OR equal to 10

I am sorry I do not quite understand what you mean.

I have uploaded my program and an example text file, if you see the output you will notice that there are a lot of phrases that occur once, I do not want to display these.

Sorry for being so dumb guys:$

Attachments
The left handed material is best described as a material that possesses simultaneous negative electric permittivity and magnetic permeability. Veselago theorised this concept in his Soviet Physics paper in 1967, and discusses the wonderful and weird properties that these materials exhibit. Since there is no natural substance possessing these properties the ideas of Veselago lay dormant for over thirty years, until theoretical physicist John Pendry published a paper in 1999 that discussed how artificial structures could be used to make a left handed material a reality. Following his work there was new interest in the topic resulting in an exponential increase in the work involved with the left handed material, consequently the first left handed material was developed in 2001. 
Veselago considered the effect of mediums possessing negative electric permittivity and magnetic permeability at a certain frequency, the result was a negative refractive index. The refractive index expressed through Maxwells relation is often used as a measure of speed for an electromagnetic wave propagating within a material. In a negative refracting medium, waves travelling through the media move backwards. The velocity of these waves are described by the phase velocity which determines the rate the phase of a wave passes a given point in time. The group velocity describes the rate of the packet of waves are transported. For a left handed material the phase velocity and group velocity travel in opposite directions, because the energy flowing through the media follows the same direction regardless of the media, the group velocity and energy follow the same direction.
#include <iostream>     // header files
#include <fstream>  
#include <string>   
#include <map>          // associated container 
#include <queue>        // container adaptor
#include <vector>       // sequence containers
#include <list>         // sequence containers

using namespace std;

struct Phrase {
               void display() const
              {
               for (int i = 0; i != words.size(); ++i)
               cout << words[i] << " "; 
              }
               vector<string> words;
              };


map<Phrase, int> stat;

struct PhraseFreqPred {
  bool operator()(const Phrase& p1, const Phrase& p2)
  {
    return stat[p1] > stat[p2];
  }
};

bool operator<(const Phrase& lhs, const Phrase& rhs)
{
  return lhs.words < rhs.words;
}

typedef vector<Phrase> container_t;

void display(container_t c, const int length, const int limit)
{
    sort(c.begin(), c.end(), PhraseFreqPred());   // Sorts into numerical order using container adaptor <queue>, You can sort a vector v simply by writing: sort(c.begin(), c.end());
    cout << endl;
	cout << "Phrases of " << length << " word(s):\n";

    for (int i = 0; i < limit && i != c.size(); ++i)
    {
      const Phrase& p = c[i];

      p.display();
      cout << ": " << stat[p] << "\n";
    }

    cout << "\n";
}

void updateStat(list<string>& words, const string& word)
{
      list<string>::const_iterator iter = words.begin();

      Phrase cur;
      while (iter != words.end())
      {
        cur.words.push_back(*iter++);
        ++stat[cur];
      }

      words.pop_front();
}

int main(){                                // main function

           string filename;
           cout << "Enter filename: ";
           cin >> filename;               // input filename

ifstream input(filename.c_str());
  
           if (!input){
                       cerr << "Failed to open input file " << filename << endl;
                       return 1;
                      }                   // error in reading input file
		   
string word;
list<string> words;

           while (input >> word){
                  for (int i = 0; i != word.size(); ++i)
                  word[i] = tolower(word[i]);
                  words.push_back(word);  // appends a single character to the string content, increasing its size by one

		   if     (words.size() == 10){   // phrases up to 10 words in length
                   updateStat(words, word);
                                      }
                                }
 
           while (!words.empty())        // program reads word by word and stores it into string content until an empty word is introduced
           updateStat(words, word);

  map<int, container_t> lengths;
  map<Phrase, int>::const_iterator iter1 = stat.begin();

  while (iter1 != stat.end()){
                              const Phrase& p = iter1->first;
                              lengths[p.words.size()].push_back(p);
                              ++iter1;
                             }

  map<int, container_t >::const_iterator iter2 = lengths.begin();

  while (iter2 != lengths.end()){
                                 const int length = iter2->first;
                                 const container_t& c = iter2->second;
                                 display(c, length, 5); 
                                 ++iter2;                          
                                 }      // displays the 5 most frequent phrases, for phrases ranging from 1 to 10 words in length


  while(true){  
	          cout << "If you wish to see phrases in greater detail please press [y]: ";		   
              string choice;
              cin >> choice;            // input for more details

               
		      if (choice != "y" && choice != "Y")
		      break;                    // program terminates if there is no inclusion of y character
			   
			   
		      cout << "Please enter the number of words you wish to see the phrases for [1-10]: ";
              int num;
              cin >> num;               // input of the number of words in a phrase

		      if (num < 1 || lengths.find(num) == lengths.end())
              cout << "Not found\n\n"<< endl;
                                        // standard output if there are no phrases for number specified
		      else{
              const container_t& c = lengths[num];
              display(c, num, c.size());
			      }
             }

  return 0; 
}


The appears 3 times and is a phrase containing one word.
a appears 4 times and is a phrase containing one word.
Left handed material appears 2 times and is a phrase containing three words.
Left handed appears 2 times and is a phrase containing two words.
handed material appears 2 times and is a phrase containing two words.

I still fail to see how you can tell a computer program how to determine what a phrase is. What distinguishes one phrase from another ? Do you remove the words "the", "a", "an", and "and", then what remains is a phrase ?

It's fine, I've managed to get it working!!!

Thank you for your help Ancient Dragon and WaltP.

If anyone wants the code PM me.

Take care

This question has already been answered. Start a new discussion instead.