954,480 Members — Technology Publication meets Social Media
Username:
Password:
Lost login information?
Have something to say? Contribute New Article Reply to this Article

distinct word count in C [new here]

Sir, i'm new here searched the forums and web.
I'm a beginner and dont know C very well.

The Question actually is this:
Write a C function called DistinctWords that counts the distinct words in its input string and prints them in the descending order of number of its occurrence. Precede each word by its count. Write a main function to test your code with different input values to demonstrate that your function is robust.

The function signature in C is as follows:

void DistinctWords(char s[]);

Thought strtok() might help and wrote this coe:

# include <stdio.h>
# include <conio.h>
# include<string.h>
# include <stdlib.h>  // for "system("cls");" to clear screen

 void distintwords(char s[]);
 int main()
 {   
     while(1)
     {
     system("cls");       
     char s[1000],x;
     printf("Enter the string: \n");
     gets(s);
     distintwords(s);
     printf("\n\nPress ENTER to try again, 'q' to quit : ");
     x = getch();
     if (x=='q')
     break;
     }
 }

void distintwords(char s[])
{
     int slen,i,j,k;
     char * tok;
     char * toks[20];
     slen == strlen(s);
     tok = strtok (s," ,.-?!");
     while (tok != NULL)
     {
      printf ("%s\n",tok);
      tok = strtok (NULL," ,.-?!");
      for(i=1;i<10;i++)
        {
           toks[i]=tok;
           printf ("Test: %s\n",tok[i]);
        }  
                    
     }
}


The above code i'm not knowing how to allot each word to different variable like tok[1],tok[2], etc so i can compare.

The other code tried by my friend is:

# include <stdio.h>
# include <conio.h>
#include<string.h>
 void distintwords(char s[]);
 int main()
 {
     char s[100];
     
     
     printf("enter the string");
     gets(s);
     distintwords(s);
     getch();
 }

 void distintwords(char s[])
 {
    int arr[20],p1=0,p2=0,count=1,n;     
     char str[20][20];
     int i=0,k=0,v=0;

  for(i=0;s[i]!='\0';i++)
{
        k=0;
           static int j=0;

        for(;s[j+i]!='\0'&& s[j+i]!=' ';j++)

        {

            str[v][k]=s[j+i];

                k++;

        }

        str[v][k]='\0';

            v++;

  }
       int words=0;

       int c=0;

while(s[c]!='\0')

{

if(s[c]==' ')

words++;

c++;

}

printf("%d\n",words+1);

     for(i=0;i<words;i++)
     {
       arr[i]= strlen(str[i]);
     }
  for(i=0;i<words;i++)
  {
      for(k=0;k<words;k++)
     
     {
       if(arr[i]==arr[k])
               n= strcmp(str[i],str[k]);
            if(n==0) 
            { 
                count++;
              printf("%d   %s",count,str[i]);
            }
                
                          
    else {
        for(i=0;i<=words;i++)
         printf("\n %d  %s",count,str[i]);
        }
      }
   
   }
 }

Both outputs are very annoying.


So for now i'm left with this code:

# include <stdio.h>
# include <conio.h>
# include<string.h>
# include <stdlib.h>  // for "system("cls");" to clear screen

 void distintwords(char s[]);
 int main()
 {   
     while(1)
     {
     system("cls");       
     char s[1000],x;
     printf("Enter the string: \n");
     gets(s);
     distintwords(s);
     printf("\n\nPress ENTER to try again, 'q' to quit : ");
     x = getch();
     if (x=='q')
     break;
     }
 }

void distintwords(char s[])
{
     int slen,i,j,k;
     char * tok;
     char * toks[20];
     slen == strlen(s);
     tok = strtok (s," ,.-?!");
     while (tok != NULL)
     {
      printf ("%s\n",tok);
      tok = strtok (NULL," ,.-?!");
      for(i=1;i<10;i++)
        {
           toks[i]=tok;
           printf ("Test: %s\n",tok[i]);
        }  
                    
     }
} 
     
     /*
     int arr[20],p1=0,p2=0,count=1,n;     
     char str[20][20];
     int i=0,k=0,v=0;

  for(i=0;s[i]!='\0';i++)
   {
     k=0;
     static int j=0;

        for(  ;s[j+i]!='\0'&& s[j+i]!=' ';j++)
        {
            str[v][k]=s[j+i];
            k++;
        }
        
        str[v][k]='\0';
        v++;
   }
   
int words=0;
int c=0;

while(s[c]!='\0')
{
  if(s[c]==' ')
  words++;
c++;
}   
printf("Total Number of Words: %d \n \n",words+1);

  for(i=0;i<words;i++)
     arr[i]= strlen(str[i]);
        
  for(i=0;i<words;i++)
    for(k=0;k<words;k++)
       if(arr[i]==arr[k])
         {
           n = strcmp(str[i],str[k]);
               if(n==0) 
                 { count++;
                   printf("%s : %d \n",str[i],count);
                 }
                
                else 
                 {
                   //for(i=0;i<=words;i++)
                   printf("%s : %d \n",str[k],count-1);
                 }
          }
}
*/

Sorry for the mess.

firoz3321
Light Poster
28 posts since Jul 2010
Reputation Points: 10
Solved Threads: 0
 

For me, the above just won't do. I couldn't care less about the "annoying" aspect of the output!

***WHAT*** is the problem that has you stumped --- EXACTLY.

What is your input - how many words do you need to handle. What is your output - what's wrong with it that you can't figure out.

No way am I going to fix some program for your assignment, because it annoys you.
;)

Welcome to the forum, btw. Now get your problem solving hat firmly in place, and get into this assignment's details.

Adak
Nearly a Posting Virtuoso
1,479 posts since Jun 2008
Reputation Points: 425
Solved Threads: 185
 

well its not my assignment actually and i dont think you might believe it. its my friends assignment and i'm trying to learn parallely. from their assignments.

Now about the code:

First code makes no sense i mean this code:

# include <stdio.h>
# include <conio.h>
# include<string.h>
# include <stdlib.h>  // for "system("cls");" to clear screen
 
 void distintwords(char s[]);
 int main()
 {   
     while(1)
     {
     system("cls");       
     char s[1000],x;
     printf("Enter the string: \n");
     gets(s);
     distintwords(s);
     printf("\n\nPress ENTER to try again, 'q' to quit : ");
     x = getch();
     if (x=='q')
     break;
     }
 }
 
void distintwords(char s[])
{
     int i;
     char * tok;
     char * toks[20];
     slen == strlen(s);
     tok = strtok (s," ,.-?!");
     while (tok != NULL)
     {
      printf ("%s\n",tok);
      tok = strtok (NULL," ,.-?!");
      printf ("%s\n",tok[i]);
      }
}


the above code works like this:iput: "hi how are you"
output is:
hi
how
are
you


Thats it :(

then i added this part of code:

for(i=1;i<10;i++)
        {
           toks[i]=tok;
           printf ("Test: %s\n",tok[i]);
        }


thinking that each word might be alloted to each string variable, but its not whats happening :(

The output window is freezing :(

i'm using DEV C++ compiler on windows Seven.

A friend of mine suggested this example

for (int i = 0; i == strlen(s); i++)
    {
     if ( == '.' || string.charAt(i) == ':' || string.charAt(i) == ';' )
{

numberofWords += 1;

}
}
System.out.println(numberofwords);


Its JAVA ( i guess ) i'm so bad with C and i cant easily get through with JAVA. But this thing:if ( == '.' || string.charAt(i) == ':' || string.charAt(i) == ';' )
Seems interesting.


I'm making this whole this soo messy, sorry.

Can you Help me ???

:(

firoz3321
Light Poster
28 posts since Jul 2010
Reputation Points: 10
Solved Threads: 0
 

Sure, but the first thing I want you to do, is to throw your friends code, away. :)

This is an exercise to develop your ability to scan a char array and use logic on it. Forget strtok(), (it's for later), and let's get into top down design:

1) Analyze the problem by doing it by hand (in this case), and jot down the essential steps you use over and over, for each word.

2) Write down those steps, in the order you do them, in small steps of logic, in english.

3) Now, we'll take those steps, and write code to match. Putting off all the non-essential details, like output formatting, etc.

4) Once the essential parts are in place, (functions, and logic), we'll add in the other details, we had put off, previously.

You can't write a program to solve a problem, that you don't know how to solve, yourself. You have to know the steps, and the order of those steps.

Here, you have two states: outside a word, and inside a word, as you scan each char. Spaces and punctuation char's change that state from inside to outside.

You can't learn to swim by standing on the shore, watching others. Forget what the codes of others may do/not do, and let's get YOU into the water.

Don't worry - I'm a Water Safety Instructor. ;)

Adak
Nearly a Posting Virtuoso
1,479 posts since Jun 2008
Reputation Points: 425
Solved Threads: 185
 

Well the strtok() prog was my idea :(

I just can only think of reading a string by gets()

Counting the length by strlen()

And Counting the words bu counting the SPACE or Punctuations

I cant get any further .

And talking about the water how did you know i'm afraid of water

firoz3321
Light Poster
28 posts since Jul 2010
Reputation Points: 10
Solved Threads: 0
 

DELETED

Reason: understood the mistake but still stick with the code :(

firoz3321
Light Poster
28 posts since Jul 2010
Reputation Points: 10
Solved Threads: 0
 

gets() is VERY unsafe, since it has the defect of allowing the user to overfill the buffer (potentially crashing the whole program or even taking control of the machine it's running on).

Use fgets(). It works on ANY stream, (file, keyboard, etc.), allows you to limit the amount that will go into a char array, and makes you float better -- maybe! ;)

Think about it, and then write down your steps - how would you count words in text (a string)? Sounds like you have the idea. Would you do it somehow differently for every word, or could you use a loop, and do each word in the same way, using the loop?

What variables would you need? Maybe you can see what I'm getting to here. strtok() is a "water wing" crutch, for this exercise. Save it for later.

I can smell fear right over the internet. ;)

I remember once Lifeguarding for a mile swim, at a pool. A guy in leg braces and using heavy metal arm "crutches", wanted to sign up for the swim.

"Oh", he said "don't worry, I don't use these when I'm in the pool". That was great, because I couldn't let him in with them on - he'd sink like a rock.

"I can swim a mile, don't worry", he added. "I just get horizontal, and propel myself with my arms, and slightly plane up and down to breathe."

I let him swim, and he did OK. I found out later he swims the mile there, every year.

You can do this. Just get your head into it, and follow the steps I posted, earlier - and get used to using fgets(). It's the way you want to go, believe me.

Adak
Nearly a Posting Virtuoso
1,479 posts since Jun 2008
Reputation Points: 425
Solved Threads: 185
 
# include <stdio.h>
# include <conio.h>
# include<string.h>
# include <stdlib.h>  // for "system("cls");" to clear screen
 
 void distintwords(char s[]);
 int main()
 {   
     while(1)
     {
     system("cls");       
     char s[500],x;
     printf("Enter the string: \n");
     fgets(s, sizeof(s), stdin); 
     distintwords(s);
     printf("\n\n\nPress ENTER to try again, 'q' to quit : ");
     x = getch();
     if (x=='q')
     break;
     }
 }
 
void distintwords(char s[])
{
   int i=0,j,k,len,words=0;
   int wrd[20];
   char * str;
   len = strlen(s);
 while(s[i]!='\0')
   {
     if(s[i]==' ') 
     words++;
     i++;
   }
   
 if (len==0)
   {
     printf("\n\nSTATS:\n\nWords: %d",words);     
     printf("\nSTATS:\nCharacters:%d \n",len);
   }
 
 else 
   {
     printf("\n\nSTATS:\n\nWords: %d",words+1); 
     printf("\nCharacters:%d \n",len-1);
   }
}


I was just posting the above code and then read your valuable suggestions.
The above code is reading the chars and words fine. I mean its a very small step ahead.


I looked for fgets(); http://beej.us/guide/bgc/output/html/multipage/gets.html

WOW that was great. Thank You. i changed the gets(); to fgets(); :)

The fgets(); is adding an extra line so i just used

len-1


but the its suggested to use this code

char *remove_newline(char *s)
{
    int len = strlen(s);

    if (len > 0 && s[len-1] == '\n')  // if there's a newline
        s[len-1] = '\0';          // truncate the string

    return s;
}


Is it ok for now ?

I can feel flapping in water but cant move :(

I still cant get the UNIQUE word counting.
I can think of "strcmp();"

P.S: My English is bad, Excuse

firoz3321
Light Poster
28 posts since Jul 2010
Reputation Points: 10
Solved Threads: 0
 

Wow! That was great timing.

Your English is fine, no worries.

fgets adds on (if space is available), an end of string char: '\0', to the string. (Because a bunch of char's are just a bunch of char's, without an end of string marker to mark them as a string.)

gets() doesn't include the newline: '\n' char that is generated by the user whenever the enter key is pressed, into the saved string. fgets() does include it, always.

So you may have TWO unseen char's on the end of your fgets() string: a newline and an end of string char. You ALWAYS want that string buffer to be generously bigger than any line of char's you want to save as a string with fgets().

To find the unique words, we need to save each word, and compare it with all the other words. So we'll need wrds[] to become wrds[][20]. That is, it needs to be a 2 dimension char array.

How many words are you going to have to handle at any time, looking for unique words? We need a size for the first dimension of wrds[SOME_SIZE][20].

Then we can find unique words using strcmp(), after we save each word into the wrds[][] array.

Adak
Nearly a Posting Virtuoso
1,479 posts since Jun 2008
Reputation Points: 425
Solved Threads: 185
 

so does "SOME_SIZE" here refer to the length of each word ? :icon_rolleyes:

firoz3321
Light Poster
28 posts since Jul 2010
Reputation Points: 10
Solved Threads: 0
 

No. It refers to the number of rows (number of words), you want to process, in total.

SOME_SIZE is not the best name for it, but it IS the size for the first dimension of the wrds[][] array.

Adak
Nearly a Posting Virtuoso
1,479 posts since Jun 2008
Reputation Points: 425
Solved Threads: 185
 

oh ok.

So its not necessary for me to be using a particular number right ?
i mean for the time, can give some 20.

firoz3321
Light Poster
28 posts since Jul 2010
Reputation Points: 10
Solved Threads: 0
 
firoz3321
Light Poster
28 posts since Jul 2010
Reputation Points: 10
Solved Threads: 0
 
s = "hello, how are you? hello !"


so this should become likes1= "hello"
s2="how"
s3="are"
s4="you"
etc

then i can use:

strlen(); and strcmp();

To work around. but just the thing, how do i divide the sentence :( in to word ???

2D Array :(

firoz3321
Light Poster
28 posts since Jul 2010
Reputation Points: 10
Solved Threads: 0
 

You can have an array that is FIXED in size, and declared looking like an array:

char array[SIZE];

Or you can have an array that is FIXED in size, but declared looking like a block of memory arranged for an array:

char *array;

The latter one is generally used either:

1) To be cute and smart looking

2) Because the array size is unknown at this time, and the array memory will
be dynamically malloc'd later on.

The key thing is, "under the hood", arrays in C are really blocks of memory, arranged in a particular way. However, it's hard to work with arrays as "blocks of memory...". Non programmers won't lightly tolerate that kind of crap, and you shouldn't work with it that way, unless you're really comfortable with it.

One key concept in programming is to keep your code and logic CLEAR and SIMPLE. If you write your code "oh so cleverly", you (and others), will find it a witch to de-bug, update, or extend the program, later.

<< BE AS CLEAR AS YOU REASONABLY CAN IN YOUR CODE >>

For this program, we need to give it a size, but here's a help:

#define ROWS 100
#define COLS 20

Note, no semi-colons at the end of either line of code!

Put that right under your include lines of code, and then use them to make up your wrds[ROWS][COLS], array.

Any changes can be made by changing just one value, in one place.

Adak
Nearly a Posting Virtuoso
1,479 posts since Jun 2008
Reputation Points: 425
Solved Threads: 185
 

You already have divided the sentence into each word! When you counted it, remember?

When you find the first letter of a word, you start putting all letters into a row of the wrds array. Keep going until you hit the next space or punctuation char.

Then go to the next row. :)

You'll want a loop for that:

for(i=0;i<ROWS;i++)
  if(word[i] && OUTSIDE) { //add && !punctuation to this
    //it's the start of a new word
    OUTSIDE= 0;
    col=0; //row value is carried into this loop
    wrds[row][col]= word[i];
    col++;     
  }
  else if( //code to handle char's when you are already inside a word ) {
    //col variable is not reset to zero
    wrds[row][col]= word[i]; 
    col++;
    
  }
  //etc.


That kind of thing. Have an errand or two to run, back in 2 hours.

Adak
Nearly a Posting Virtuoso
1,479 posts since Jun 2008
Reputation Points: 425
Solved Threads: 185
 
# include <stdio.h>
# include <conio.h>
# include<string.h>
# include <stdlib.h>  // for "system("cls");" to clear screen
 
 void distintwords(char s[]);
 int main()
 {   
     while(1)
     {
     system("cls");       
     char s[500],x;
     printf("Enter the string: ");
     fgets(s, sizeof(s), stdin); 
     distintwords(s);
     printf("\n\nPress ENTER to try again, 'q' to quit : ");
     x = getch();
     if (x=='q')
     break;
     }
 }
 
void distintwords(char s[])
{
   int i=0,j=0,k=0,c=0,R=0,len=0,words=0,pncts=0,spac=0,breaks=0;
   len = strlen(s)-1;
   
   while(s[i]!='\0')
   {
     if(s[i]==' ') 
     words++;
     i++;
   }
   
 while(s[j]!='\0')
   {
     if( s[j]==',' || s[j]=='.' || s[j]==';' || s[j]=='?' || s[j]=='!' || s[j]==':' ) 
     pncts++;
     j++;
   }
 spac=words; 
  //----------------------------------------------------------
    int arr[20],count=1,n;     
    char str1[20][20];

  for(i=0;s[i]!='\0';i++)
     {
        c=0;j=0;
        for( ;s[j+i]!='\0' && s[j+i]!=' ';j++)
         {
            str1[R][c]=s[j+i];
            c++;
         }
        str1[R][c]='\0';
        R++;
    }
  //----------------------------------------------------------
    for(i=0;i<=words;i++)
       arr[i]= strlen(str1[i]);
    
    for(i=0;i<words;i++)
     {
       for(c=0;c<words;c++)
         { arr[c]=strlen(str1[c]);
          if(arr[i]==arr[c])
             n= strcmp(str1[i],str1[c]);
           if(n==0) 
             { 
               count++;
               printf("%d   %s\n",count,str1[i]);
             }
                        
           else 
           {
            for(i=0;i<=words;i++)
            printf("\n %d  %s",count,str1[i]);
           }
    }
   
  }
breaks=spac+pncts;
//test
for(i=0;i<=breaks;i++)
printf("\narr[%d] = %d",i,arr[i]);

  //----------------------------------------------------------

 if (len==0 || spac==len || pncts==len || len==(spac+pncts))
   {
     printf("\n\nSTATS:\n");
     printf("\nWords        :0");
     printf("\nPunctuations :%d",pncts);
     printf("\nBlank Spaces :%d",spac);     
     printf("\nCharacters   :%d",len);
   }
 
 else 
   {
     printf("\n\nSTATS:");
     printf("\nPunctuations :%d",pncts);
     printf("\nBlank Spaces :%d",spac);
     printf("\nWords        :%d",words+1); 
     printf("\nCharacters   :%d",len);
     printf("\n\n NOTE:\n(Characters includes SPACES and Punctuations)");
   }
}

Its all gone wrong :(

firoz3321
Light Poster
28 posts since Jul 2010
Reputation Points: 10
Solved Threads: 0
 
for(i=0;i<spac;i++)
  if(s[i] && " ") 
    {
    outside= 0;
    col=0; 
    wrds[row][col]= word[i];
    col++; 
    }
  
  else 
    {
        wrds[row][col]= word[i]; 
        col++;
    }
 for(i=0;i<=col;i++) 
 printf("\n %d : %s",i,word[i]);

When ever i see a SPACE i need to start saving the characters to a new colum.
But what is OUTSIDE ??

Please help :(

firoz3321
Light Poster
28 posts since Jul 2010
Reputation Points: 10
Solved Threads: 0
 

outside is one of the two states your program can be in, as it scans through the text. Just a "flag", and doesn't need to be capitalized.

When you reach a space (or punctuation mark), you should start saving to a new row, not a new column.

I'll study your code. Watching the Tour de France, just now though.

Adak
Nearly a Posting Virtuoso
1,479 posts since Jun 2008
Reputation Points: 425
Solved Threads: 185
 

thought of "strncpy" but it think its not possible either .

Ok have a good time :)

firoz3321
Light Poster
28 posts since Jul 2010
Reputation Points: 10
Solved Threads: 0
 

This article has been dead for over three months

Post: Markdown Syntax: Formatting Help
You
 
View similar articles that have also been tagged: