The question goes like this:

You are given a file in which each line contains a list of space separated words.
Write a program to find out:
1. List of words that are present in all the lines (words common to all lines)
2. List of unique words that are present in the file
3. The length of the line and file can be of any arbitrary size and your program can *not* hardcode either one of them. To reiterate, your program cannot allocate an array of arbitrary size or malloc an arbitrarily huge size in the beginning. It should dynamically allocate memory as and when required.

Following are the requirements for the output of your program:
 The output of your program should contain two lines.
• First line will have a space separated list of the words that are present in all the lines.
• The next line will have a space separated list of all words occurring in the file. If the same word appears more than once in two different lines, your program should print it only once.
 The order of the words printed in the output should be the same as the order in which the words occur in the input file.

For example if the input-file by name "a.txt" contains:

good cat rat mat bat sat fat great bad
bad good great sat fat fun
cat bat fat mat sat good great fun find

The output expected is:

common words: good sat fat great
unique words: good cat rat mat bat sat fat great bad fun find

Notice that the order in which the words are printed by your program is important. The output

common words: good sat fat great

is NOT the same as

common words: sat fat good great

because that is not the order in which these words appear in the file.

Your program will be invoked as
<your_program> a.txt
That is the file name containing lines with space separated words will be passed as a command line argument to your program.

Your program should strictly adhere to the format of output given in the problem statement for being considered for evaluation.
With respect to the input file you can assume that:
• There are no blank lines
• There is at least one word in each line
• A word never repeats itself in the same line

I have written the following code:

#include<stdio.h>
#define ch_limit 500

struct node {
	char word[ch_limit];
	int count;
	struct node *next;
};

typedef struct node *list;

int count_lines(FILE *fp)
{
return;
}

list populate(char *word,list first)
{
        list temp,new=first;
        int value=0;
        temp=(list)malloc(sizeof(list));
        strcpy(temp->word,word);
        temp->count=0;
        temp->next=NULL;

        
        if(first==NULL){
                first=temp;
                return first;
        }

        while(new->next!=NULL){
                if(strcmp(new->word,word)==NULL){
                        value=new->count;
                        value+=1;
                        new->count=value;
                        free(temp);
                        return(first);
                }else{
                        printf("I am inside");
                        new=new->next;
                }
        }
        if(strcmp(new->word,word)==NULL){
                value=new->count;
                value+=1;
                new->count=value;
                return(first);
        }
        new->next=temp;
        return(first);
}

list read_word(FILE *input_file)
{
	char word[ch_limit];
	int ch;
	int i,count=0;
	list first=NULL;
	
	while((ch=fgetc(input_file)) !=EOF){
		
		for(i=0;(ch!=' ') || (ch!='\n');i++){
			word[i]=ch;
			ch=fgetc(input_file);
			if (ch == EOF)
				break;
		}
			word[i]='\0';
			first=populate(word,first);	
			
	}
	return(first);
}

int display(list first,int line_nos)
{
	list unique,common;
	common=unique=first;
	if(unique==NULL){
		printf("The list of unique words is empty\n");
		return;
	}
	printf("Unique List: ");
	while(unique){
		puts(unique->word);
		printf(" ");
		unique=unique->next;
	}
	printf("\n");

	if(common==NULL){
		printf("The list of common words is empty\n");
		return;
	}

	printf("Common List: ");
	while(common){
		if(common->count!=line_nos){
			puts(common->word);
			printf(" ");
		}
		common=common->next;
	}
	return;	
}


int main(int argc, char *argv[])
{
	FILE *input_file;
	int line_nos;
	list first=NULL;
	
	if(argc!=2) {
		printf("Usage: %s filename\n",argv[0]);
		exit(0);
	}
	if((input_file=fopen(argv[1],"r"))==NULL){
		printf("Error in opening the file\n");
		exit(0);
	}

	line_nos=count_lines(input_file);

	first=read_word(input_file);
	
	display(first,4);

	close(input_file);
}

However, the output obtained for the above code is:

# ./a.out txt.txt
List: go so go mo bo
go inter yak lo
jai go kim yoka
my go bat cat

Common List: go so go mo bo
go inter yak lo
jai go kim yoka
my go bat cat

Please help me in debugging whats wrong in the code :(((

What are the contents of the txt.txt file?

go so go mo bo
go inter yak lo
jai go kim yoka
my go bat cat

I tried to compile it with VC++ 2008 Express and got a lot of warnings. Most of them were about functions that are declared to return an int but return nothing. You should either add return a number or make the functions void.

line 21: you are allocating too little space. list is a pointer and what you want is the size of the struct node. replace list with struct node


line 33: strcmp() does not return NULL -- it returns 0. VOID may or may not be defined as 0, depending on your compiler. strcmp() returns an integer.

line 130: close() is undefined. You probably meant fclose()

Well at the very least you have a problem in line 63 in your original code:

for(i=0;(ch!=' ') || (ch!='\n');i++)

The above criteria will always be true. You have an OR (||). You need an AND (&&). You are thus only calling the populate function once rather than once per word. Your count_lines function:

int count_lines(FILE *fp)
{
return;
}

doesn't return an int. You also call that function but don't appear to do anything with the return value (which is good since you aren't returning anything from the function). When I changed the "||" to "&&" I got a bunch of calls to the populate function and a bunch of displays of "I am inside". The program still has problems but it at least gets that far now.

Here is a version that doesn't crash when ran. I completly replaced your read_word() function to use fscanf() -- its a lot easier when just reading space-deliminated words from a file.

The program does not meet all the requirements, but at least it doesn't crash.

#include<stdio.h>
#include <stdlib.h>
#include <string.h>
#pragma warning(disable: 4996)

#define ch_limit 500

struct node {
	char word[ch_limit];
	int count;
	struct node *next;
};

typedef struct node *list;

int count_lines(FILE *fp)
{
return 0;
}

list populate(char *word,list first)
{
        list temp = NULL, node = NULL;
        int value=0;
        temp=(list)malloc(sizeof(struct node));
        memset(temp,0,sizeof(struct node));
        strcpy(temp->word,word);
        temp->count=0;
        temp->next=NULL;

        
        if(first==NULL){
                return temp;
        }
        node = first;
        while(node->next!=NULL){
                if(strcmp(node->word,word)==0){
                        node->count++;
                        free(temp);
                        return first;
                }else{
                        //printf("I am inside\n");
                        node = node->next;
                }
        }
        if(strcmp(node->word,word)==0){
                node->count++;
                free(temp);
                return(first);
        }
        node->next=temp;
        return(first);
}
list read_word(FILE *input_file)
{
    char word[ch_limit] = {0};
	list first=NULL;
    while( fscanf(input_file, "%s", word) > 0 )
    {
			first=populate(word,first);	
    }
    return first;
}
/*
list read_word(FILE *input_file)
{
	char word[ch_limit];
	int ch;
	int i,count=0;
	list first=NULL;
	
	while((ch=fgetc(input_file)) !=EOF){
		
		for(i=0;(ch!=' ') || (ch!='\n');i++){
			word[i]=ch;
			ch=fgetc(input_file);
			if (ch == EOF)
				break;
		}
			word[i]='\0';
			first=populate(word,first);	
			
	}
	return(first);
}
*/

void display(list first,int line_nos)
{
	list unique,common;
	common=unique=first;
	if(unique==NULL){
		printf("The list of unique words is empty\n");
		return;
	}
	printf("Unique List: ");
	while(unique){
		puts(unique->word);
		printf(" ");
		unique=unique->next;
	}
	printf("\n");

	if(common==NULL){
		printf("The list of common words is empty\n");
		return;
	}

	printf("Common List: ");
	while(common){
		if(common->count!=line_nos){
			puts(common->word);
			printf(" ");
		}
		common=common->next;
	}
	return;	
}


int main(int argc, char *argv[])
{
	FILE *input_file;
	int line_nos;
	list first=NULL;
	if(argc!=2) {
		printf("Usage: %s filename\n",argv[0]);
		exit(0);
	}
	if((input_file=fopen(argv[1],"r"))==NULL){
		printf("Error in opening the file\n");
		exit(0);
	}

	line_nos=count_lines(input_file);

	first=read_word(input_file);
	
	display(first,4);

	fclose(input_file);
}

My logic: I have a single list with all the words in the file. I will increment the count when the word appears again in the file. At the end, the words whose count is same as the no. of lines will be the common words.

The code will satisfy all the criterion.

Please let me know if you have any other logic which can be more optimising.

my code for my logic:

[code=
#include<stdio.h>
#include<string.h>
#define CH_LIMIT 500


struct node {
char *word;         /* Points to the word */
int count;      /* ref.count */
struct node *next;  /* Points to next node */
};


typedef struct node *list;



int count_lines(FILE *input_file)
{
int ch,lines = 0;
while ( (ch = fgetc(input_file)) != EOF )
{
if ( ch == '\n' )
{
++lines;
}
}
fseek(input_file,0,SEEK_SET);
return(lines);
}



list populate(char *word,list first)
{
list temp,new=first;
int word_len;
int value = 0;
temp = (list)malloc(sizeof(*first));
word_len = strlen(word);


/* Build the node with the read word */
/* Allocating dynamic memory for the word read */


temp->word = (char *) malloc(sizeof(word_len+1));
strcpy(temp->word,word);
temp->count=0;
temp->next=NULL;


if(first==NULL){
first=temp;
return first;
}


while(new->next!=NULL){
if(strcmp(new->word,word)==NULL){
(new->count)++;
free(temp->word);
free(temp);
return(first);
}else{
new = new->next;
}
}
if( strcmp(new->word,word) == NULL ){
(new->count)++;
return(first);
}
new->next = temp;
return(first);
}



list read_word(FILE *input_file)
{
char word[CH_LIMIT] = {0};
list first=NULL;
while( fscanf(input_file, "%s", word) > 0 ){
first = populate(word,first);
}
return(first);
}



int display(list first, int line_nos)
{
list unique,common;
common = unique = first;


/* Common words are those whose count is same as the number of lines
* in the text which satisfies the requirement that the common word
* should be present once in all the lines of the text file.
*/
printf("common words: ");
while(common){
/* If there is only one line in the text, the common list
* has to be empty since they cannot be compared with any
* other lines. Hence we are checking (line_nos!=1).
*/
if(line_nos != 1){
if(common->count>=(line_nos-1)){
printf("%s",common->word);
printf(" ");
}
}
common=common->next;
}
printf("\n");


printf("unique words: ");
while(unique){
printf("%s",unique->word);
printf(" ");
unique=unique->next;
}
printf("\n");
return;
}


int main(int argc, char *argv[])
{
FILE *input_file;
int line_nos;
list first=NULL;


if(argc!=2) {
printf("Usage: %s <filename>\n",argv[0]);
exit(1);
}


if((input_file=fopen(argv[1],"r"))==NULL){
printf("Error: Unable to open %s\n",argv[1]);
printf("Usage: %s <filename>\n",argv[0]);
exit(2);
}


line_nos=count_lines(input_file);


first=read_word(input_file);


display(first,line_nos);


fclose(input_file);


exit(0);
}
code]

Edited 3 Years Ago by happygeek: fixed formatting

Did you wonder how come Ancient Dragon's code post looks so nice and your post is all scattered over, and hard to read?
Please read.

This question has already been answered. Start a new discussion instead.