This is a somewhat big project for me as I'm not very experienced in c++. The end goal is to read a text file, and read every word in the text file. Each time a new word is read, store it in a vector. If a word is repeated, keep a counter for how many times the word was used.
Right now, I have code that is supposed to read every word, and then give me a final count on the total number of words. For some reason it reads the first 4 and then stops.

Can any one help me see my problem? Thanks in advance

#include "stdafx.h"
#include <iostream>
#include <string>
#include <sstream>
#include <fstream>
using namespace std;

int main()
{
	int i = 0;
	bool loop = true;
	string line;
	stringstream ss;
	string output;
	ifstream myFile ("const.txt");
	if (myFile.is_open())
	{
		while(myFile.good())
		{
			getline (myFile,line);
			ss << line;

			while (loop)
			{
				std::string st;
				ss >> st;
				i++;
				output+=st+" ";
				if (st == "")
				{
					loop = false;
				}
			}
		}
		myFile.close();
		cout << output << endl;
		cout << "number of words " << i;
	}
	else cout << "unable to open";

	system ("pause");
}

This is the output that I get

Provided by USConstitution.net
number of words 4Press any key to continue . . .

And here is the text file I'm reading from
http://www.usconstitution.net/const.txt

Edited 5 Years Ago by TheNNS: n/a

Well first off you never reset you loop variable to true after you exit your while loop. Secondly I would change line 18 to while(getline (myFile,line)) and then you can delete line 20. As a side note how are you going to handle punctuation marks?

Well first off you never reset you loop variable to true after you exit your while loop.

Why do I need to do that? It's supposed to read characters until it gets to an empty space, then it reads the next one, and so on. But it makes sense to use

while(getline (myFile,line))

instead.

As a side note how are you going to handle punctuation marks?

I'm thinking I'll get rid of punctuation and replace uppercase with lowercase letters.

The while loop you have is doing working for the first line because loop is true on the first iteration. When you read the next line in and go to the for loop loop is still false after reading in the first line so nothing will happen.

so how do i make it read the next line with text? how will it skip blank lines?

#include <iostream>
#include <fstream>
#include <string>

using namespace std;

int main( )
{
ifstream in("abc.txt");
string temp;
vector<string> vect;
while(getline(in,temp))
    vect.push_back(temp);

for(int i = 0; i != vect.size( ); i++)
    CONSOLE_Print(vect[i]);

return 0;
}

this should work, and it's smaller too

edit:
if you want to skip blank lines replace

while(getline(in,temp))
    vect.push_back(temp);

with

while(getline(in,temp))
{
    if(temp.empty( ))    
    vect.push_back(temp);
}

Edited 5 Years Ago by dospy: n/a

thanks for your replies. So far, my program will read all of the words from the text file correctly. The next thing I need to do is allocate memory for each file and save the pointers in a vector char. Does any one know the syntax to do that?

Here's is some pseudo code/c++ code

vector <char*> words;
while(there are words, keep reading them)
{
    words.push_back(word pointer);
}

Is this correct?
What is the proper syntax?

Edited 5 Years Ago by TheNNS: n/a

Why are you using a char*? If you are working with strings why not use the std::string?

Because it's for a class and they want us to learn the least efficient way possible.

This is what I have so far

ifstream in("const.txt");
	string temp;
	vector<string> words;
	vector<char*> phrase;
	vector<int> count;
	int counter = 0;
	while(in>>temp)
	{
		//gets rid of punctuation ',' '.' ';' and/or paranthesis
		for (int i = 0; i < temp.size(); i++)
		{
			if (temp[i] == ',' || temp[i] == '.' || temp[i] == ';' || temp[i] == '(' || temp[i] == ')')
			{
				temp = temp.erase(i);
			}
		}
		//sets all characters to lowercase to compare words
		for (int i = 0; i < temp.size(); i++)
		{
			temp[i] = tolower( (unsigned char) temp[i] );
		}
		//vect.push_back(temp);
		stringstream convert(temp); // stringstream used for the conversion initialized with the contents of Text
		int Result;
		if ( (convert >> Result) )
		{
			//if string is a number, do nothing
		}
		//if it's not a string, copy it to the array
		else
		{
			words.push_back(temp);
			char* place;

			//phrase[0]= &temp;
			for (int i = 0; i < phrase.size(); i++)
			{
				phrase[i] = (char*) malloc (i+1);
			}	
		}
	}

So I now have a vector of lowercase words with no punctuation. I need to store these memory addresses in a char* vector. What is the syntax for that?

This is my code so far. I know I have a lot of unused variable and a lot of weird comments, but it works.

int main()
{
	ifstream in("const.txt");
	string temp;
	bool add = true;
	vector<string> words;
	vector<char *> phrase;
	vector<int> count;
	int counter = 0;
	//phrase[0] = "";
	int v = 5;
	int* p = &v;
	int a = *p;
	char * cstr;
	//char* b = &v;
	while(in>>temp)
	{
		//gets rid of punctuation ',' '.' ';' and/or paranthesis
		for (int i = 0; i < temp.size(); i++)
		{
			if (temp[i] == ',' || temp[i] == '.' || temp[i] == ';' || temp[i] == '(' || temp[i] == ')' || temp[i] ==':')
			{
				temp = temp.erase(i);
			}
		}
		//sets all characters to lowercase to compare words
		for (int i = 0; i < temp.size(); i++)
		{
			temp[i] = tolower( (unsigned char) temp[i] );
		}
		//vect.push_back(temp);
		stringstream convert(temp); // stringstream used for the conversion initialized with the contents of Text
		int Result;
		if ( (convert >> Result) )
		{
			//if string is a number, do nothing
		}
		//if it's not a string, copy it to the vector
		else
		{
			/*This puts the words 'temp' into the string vector*/


			/*this puts the pointer for 'temp' into the char* vector*/

			char * writable = new char[temp.size() + 1];
			std::copy(temp.begin(), temp.end(), writable);
			writable[temp.size()] = '\0'; // don't forget the terminating 0

			// don't forget to free the string after finished using it
			//char * testStr = temp.c_str();
			//phrase.push_back(testStr);
	


			//checks for the word in the vector
			if (phrase.empty())
			{
				phrase.push_back(writable);
				words.push_back(temp);
			//	count.push_back();
			}
			else
			{
				for (int i = 0; i < phrase.size(); i++)
				{
					string temp2 = phrase[i];
					if (strcmp(writable, temp2.c_str())==0)
					{
						add = false;
					}

				}
				if (add)
				{
					phrase.push_back(writable);
					//cout<<writable;
					words.push_back(temp);
				}
			}
		//	delete[] writable;

		}
		add = true;
	//cout<<"finished else statement \n";
	}

//	for (int i = 0; i != phrase.size( ); i++)
//	{
//		string blank = " ";
//		cout <<phrase[i]<<endl;
//	}
	cout << "lenght of phrase " << phrase.size()<<endl;

	system ("pause");
}

the vector count needs to run parallel to phrase, so that every time a word is repeated, it stores the same offset in this array and a count of how many times that word was used. Any one have an idea of how I could do that?

If you are trying to ask how do you get the index of the element if it is already in the vector than you can get it in you if statement on line 68. after line 70 just add a line to assign i to some variable you crated at the start of main. Something like indexOfAlreadyExistingWord. I am a little curious as to how your instructor wants this done. There seams to be a lot of unneeded code. If I was to do this i would use a map with a key value of type string and a mapped value of type int. Here is an example of how I would do this

#include <map>
#include <iostream>
#include <string>
#include <fstream>

using namespace std;

int main()
{
    string word;
    size_t pos;
    map<string, int> wordList;
    ifstream fin("const.txt");
    while (fin >> word)
    {
        while ((pos = word.find_first_of(",.;:()123456789", 0)) != string::npos)  // this gets rid of puntuation and numbers
            word.erase(pos, 1);
        if (word.empty())
            continue;
        wordList[word] += 1;  // ads the word to the list and incremetns the counter
    }
    cout << "The number of words foud is " << wordList.size() << endl;
    cin.get();
    return 0;
}
Comments
thanks man

Yeah, I don't know why he wants to do all this crap. Apparently we'll learn maps next semester but that seems like it's so much faster.
Now that I have everything working, the vector of char*s, and the parallel vector of ints, I'm going to find the largest 10 values in the int vector and print those out with their word counterparts.

any good way to do this? I was thinking of iterating through the vector of ints like this

int tempCnt = 0;
int mostUsed[10];

for (int i; i < count.size(); i++)
           if (count[i] > tempCnt)
               tempCnt = count[i];
               mostUsed[0] = tempCnt;

This will only get the largest element, not the top ten, and I'm sure there a better way than having 10 if statements.

The only way I can think of to do that without having 10 max variables and 10 if statements would be to copy the integer vector into a temp vector then using 2 for loops go through and find the max. After you find the max delete that element in the vector and do it again. Something like this.

vector<int> tempCount = count;
int maxElements[10];
int max = 0;
size_t vecSize = tempCount.size()
for(int i = 0; i < 10; i++)
{
    for(size_t j = 0; j < vecSize;  j++)
    {
        if (tempcount[j] > max)
        {
            max = tempcount[j];
            maxElements[i] = j;
        }
    }
    tempCount.erase(tempCount.begin() + maxElements[i]);
    size--;
    max = 0;
}

Just though I'd let every know that I finished this lab, so we can close this thread, or at least stop posting in it. Here's my final solution.

#include "stdafx.h"
#include <iostream>
#include <string>
#include <sstream>
#include <fstream>
#include <vector>
#include <algorithm>
using namespace std;


int main()
{
	//reads in the text file
	ifstream in("const.txt");
	//each word read int he document gets stored in temp 
	string temp;
	//used to add a word to a vector or a counter
	bool add = true;
	//vector of strings used in parallel with phrase
	vector<string> words;
	//stores each word used in the text
	vector<char *> phrase;
	//stores the coutners for the words used
	vector<int> count;

	//reads in a word at a time
	while(in>>temp)
	{
		//gets rid of punctuation ',' '.' ';' and/or paranthesis
		for (int i = 0; i < temp.size(); i++)
		{
			if (temp[i] == ',' || temp[i] == '.' || temp[i] == ';' || temp[i] == '(' || temp[i] == ')' || temp[i] ==':')
			{
				temp = temp.erase(i);
			}
		}
		//sets all characters to lowercase to compare words
		for (int i = 0; i < temp.size(); i++)
		{
			temp[i] = tolower( (unsigned char) temp[i] );
		}
		//vect.push_back(temp);
		stringstream convert(temp); // stringstream used for the conversion initialized with the contents of Text
		int Result;
		if ( (convert >> Result) )
		{
			//if string is a number, do nothing
		}
		//if it's not a string, copy it to the vector
		else
		{

			/*I know this is weird, but it makes sense to me. It coverts a non constant char * to a constant char **/

			char * writable = new char[temp.size() + 1];
			std::copy(temp.begin(), temp.end(), writable);
			writable[temp.size()] = '\0'; 

			//checks for the word in the vector
			if (phrase.empty())
			{
				//if vectors are empty, add these values
				phrase.push_back(writable);
				count.push_back(1);
				words.push_back(temp);
			}
			else
			{
				//checks the vector. if the word is  already there, don't store it, but add a counter to the int vector
				for (int i = 0; i < phrase.size(); i++)
				{
					string temp2 = phrase[i];
					if (strcmp(writable, temp2.c_str())==0)
					{
						count[i]++;
						add = false;

					}

				}
				//if the words isn't there, add it
				if (add)
				{
					phrase.push_back(writable);
					count.push_back(1);
					//cout<<writable;
					words.push_back(temp);
					//cout<<temp;
				}
			}
		}
		add = true;//reset this value
	}

    //these vectors are copies of phrases and count, used to get the number of words frequently used
	vector<int> tempCount = count;
	vector<char*> tempPhrase = phrase;
	int maxNumbers[10];

	//
	cout << "Top 10 words in this document: \n";
	//finds the top 10 most used words and prints them
	for (int i = 0; i < 10; i++)
	{
		vector<int>::const_iterator largest = max_element(tempCount.begin(), tempCount.end());
	
		//*largest;
		cout << tempPhrase[largest-tempCount.begin()]<< "\tused "<< *largest <<" times\n";
		tempCount[largest-tempCount.begin()] = 0;
	}

	system ("pause");
}

I know this is really slow as it has to sort through the vector for every new word, but it's what the professor wanted.

Here is the pseudo for what ye' are wanting to do:

1. open the text file
2. perform error handling; check if file open was successful.
if unsuccessful, handle accordingly.

//Begin Loop//
3. using a loop, push each word into a temp_string.
4. pass the temp_string into a function that will strip away any punctuation.
5. pass the temp_string into a function that will make all letters lower-case.
6. pass the temp_string into a function that will determine if the string is already in a vector containing the text file. you can loop through every element of the vector to make a comparison, or use the find function in <algorithm>
7. if word is unique, push it into a vector of unique words.
8. if word is not unique, increment an identical vector<int> that corresponds to the vector of unique words; each element serving as a counter of the number of occurances each word being used in the text file.
//End Loop//

9. display ye' results
10. close fstream objects.

//Some of the vars you'll need
fstream infile;
string temp;
vector<string> unique_words;
vector<int> word_counters;
int words = 0;
int size  = 0;

//Here are some functions you might need
bool is_unique(string, vector<string>);
string strip_punct(string);
string make_lower(string);
int get_position(string, vector<string>);


//Here is the beefy part of the program, just to get ye' started

while(infile)
{
     //Read in a word from the text file
     infile >> temp;

     //Strip away any punctuation
     temp = strip_punct(temp);
     //Make all lower case (for comparison reasons)
     temp = make_lower(temp);
     
     //Get number of elements in the 'unique_words' vector for loop efficiency purposes
     size = unique_words.size();

     for(int i=0; i<size; i++)
     { 
          //If the word has never been used, add it to the 'unique_words' vector
          if(is_unique(temp, unique_words))
          { 
               //Increment counter of number of words in 'unique_words' vector
               words++;
               //Add the new word to the vector of unique words
               unique_words.push_back(temp);
               //Add one more element to your 'vector of counters'
               word_counters.resize(words);
               //Increment the counter corresponding to your newly added word
               word_counters[words]++;
          }
      
          else //word has been used before
          {
               //Get position of the word already in the 'unique_words' vector
               pos = get_position(temp, unique_words);
               //Increment the corresponding element in your vector of counters
               word_counters[pos]++;
          }
}

so now ye' will have something like this:

unique_words [how][now][brown][cow]
word_counters[12] [5] [42] [14]

Edited 5 Years Ago by Clinton Portis: n/a

Edit: fixed a bunch of mistakes. sorry, i do most of this stuff in my head... which unfortunately is not a good debugger.

my bad.

//Some of the vars you'll need
fstream infile;
string temp;
vector<string> unique_words;
vector<int> word_counters;
int words = 0;

//Here are some functions you might need
bool is_unique(string, vector<string>);
string strip_punct(string);
string make_lower(string);
int get_position(string, vector<string>);


//Here is the beefy part of the program, just to get ye' started

while(infile)
{
     //Read in a word from the text file
     infile >> temp;

     //Strip away any punctuation
     temp = strip_punct(temp);
     //Make all lower case (for comparison reasons)
     temp = make_lower(temp);

     //If the word has never been used, add it to the 'unique_words' vector
     if(is_unique(temp, unique_words))
     { 
          //Increment counter of number of words in 'unique_words' vector
          words++;
          //Add the new word to the vector of unique words
          unique_words.push_back(temp);
          //Add one more element to your 'vector of counters'
          word_counters.resize(words);
          //Increment the counter corresponding to your newly added word
          word_counters[words]++;
     }
      
     else  //Word has been used before
     {
          //Get position of existing word
          pos = get_position(temp, unique_words));
          //Increment its counter
          word_counters[pos]++;
     }
}

Edited 5 Years Ago by Clinton Portis: n/a

That's how I originally did it, but found out we had to use a vector of char*'s instead of strings. I like your code better though, much simpler.

Edited 5 Years Ago by TheNNS: n/a

This article has been dead for over six months. Start a new discussion instead.