I go to Rutgers, NB, and I'm taking a class that requires the use of C without teaching us much of C, so sorry if I'm a bit uneducated.

My first question is, when using fgets(), how do I get the next line? I have

while((fgets(line,MAX,file)) != NULL){

to get the first line of a text file, but how can I keep reading it, line by line?

Secondly, I have to parse the string input and pretty much ignore all non-alphabetic characters to retrieve words (a word is defined as alphabetic characters surrounded by either a NULL or non-alpha character)

weird-$con@tent.

would produce the output

word1 = weird
word2 = con
word3 = tent

and I was wondering if there were a built-in way to do this, perhaps using strtok().

Thanks for any help.

Recommended Answers

All 16 Replies

>>but how can I keep reading it, line by line?

That's the purpose of the while statement that you posted. Every time fgets() is called it will read the next line in the file. The "!= NULL" says to keep reading until end-of-file is reaches or some other error occurs.

Yes, strtok() is one way (and probably the easiest) to extract the words.

so, to clarify, if I put the fgets() line in a loop, such as

while(c != EOF) //c is the character i'm reading from the line from fgets

it will keep producing lines of length max-1, save for errors or end of line/file?

and is there a way to tell strtok to tokenize ignoring all non-letter characters?

no, EOF is not a char, and you don't need it anyway, just while( fgets(...) ) will do the trick

>>is there a way to tell strtok to tokenize ignoring all non-letter characters?
Probably not. It might be easier to use a pointer and test each character with isalnumeric() which includes 'a'-'z', 'A'-'Z' and '0'-'9'. Just use isalpha() if you don't want numeric digits

alright thanks, right now, the problem occurring is that my test case is two lines

Some ?Random> (random12) weird-$con@tent.
Madam, I’m adam.

and the output, when i run the loop to read this, is

Madam, I?m adam.andom12) weird-$con@tent.

which is a mixture of the first and second lines. any ideas on why this would happen?

>> any ideas on why this would happen?

Yes, but its not worth posting until I see your code.

http://jump.fm/OQUOJ

right now, all it does is identify the last letter 't' as a palindrome and can't count the words. it also says there's only one line because I keep getting that mixture of the input file, posted above, as a single line.

Dude, if you want help on the forum, then post your code on the forum. I went to your link and all I could do was get advertising and crap. Your file is apparently there, but you can't d/l it.

Get real if you want help.

to post the code just copy into the clipboard and paste it here in the editor. put [code] /* your code goes here [/code] code tags around the code, as in this example.

/*
 * wordstat.c
 *
 *  Created on: Oct 5, 2010
 *      Author: charlieg
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define MAX 101				//max number of char per line

struct pal{
	char *word;
	short int count;
	struct pal *next;
};

int isPalindrome(char *c);
struct pal *mergesort1(struct pal *front);
struct pal *merge(struct pal *front1, struct pal *front2);
void condense(struct pal *front);

int main(int argc, char* argv[]){

	/*int w;

	for(w=0;w<argc;w++){
		printf("%s\n",argv[w]);
	}*/

	unsigned long int lines = 0, words = 0;
	unsigned short int wl = 0;  //word length
	int  c;
	char line[MAX];
	char word[MAX];
	struct pal *new, *front;

	front = (struct pal*)malloc(sizeof(struct pal));
	front->count = -1;


	if(argc != 3){
		fprintf(stderr, "Run ./wordstat <option> <filename> "
				"(type -h as option for help)\n");
		exit(-1);
	}

	FILE *file = fopen(argv[2], "r");
	if(file==NULL){
		fprintf(stderr, "%s is not a readable .txt file\n",argv[2]);
		exit(-2);
	}


	while((fgets(line,MAX,file))){					//for some reason, this produces
		int i;										//line of second mixed with first
		printf("%s\n",line);
		fflush(stdout);
		for(i = 0; line[i] != '\n'; i++){
			c = line[i];

			if(isalpha(c)){
				word[wl] = c;
				wl++;
			}else{
				if(wl>0){
					word[wl] = '\0';
					printf("%s",word);			//for testing
					fflush(stdout);
					words++;
				}else{
					continue;					//if it's a single non-alpha
				}								//it's not a word
			}

			if(isPalindrome(word) == 0){
				new = (struct pal*)malloc(sizeof(struct pal));
				new->word = word;
				new->count = 1;
				if(front->count == -1){
					front = new;
				}else{
					new->next = front;
					front = new;
				}
			}

			wl = 0;
		}
	}

	front = mergesort1(front);		//sort
	condense(front);				//condense duplicates

	if(strcmp("-h",argv[1])==0){
		printf("Usage: ./wordstat <option> <filename>\nOptions:\n"
				"%40s\n%40s\n%44s\n%44s\n", "-l: number of lines",
				"-w: number of words","-p: palindrome's statistics", "-pf: palindrome's frequency");
		return 0;
	}else if(strcmp("-l",argv[1])==0){
		printf("%lu\n", lines);
		fflush(stdout);
		return 0;
	}else if(strcmp("-w",argv[1])==0){
		printf("%lu\n", words);
		fflush(stdout);
		return 0;
	}else if(strcmp("-p",argv[1])==0){
		struct pal *ptr;
		ptr = front;
		while(ptr != NULL){
			printf("\t%s\n", ptr->word);
			fflush(stdout);
			ptr = ptr->next;
		} return 0;
	}else if(strcmp("-pf",argv[1])==0){
		struct pal *ptr;
		ptr = front;
		while(ptr != NULL){
			printf("\t%s\t\t\t%d\n", ptr->word, ptr->count);
			fflush(stdout);
			ptr = ptr->next;
		} return 0;
	}else{
		fprintf(stderr, "%s is not a valid option\n",argv[2]);
		exit(-3);
	}
	return 0;
}

int isPalindrome(char *c)
{
   int i = strlen(c)-1;
   int j = 0;

   while (j <= i){
	   if(c[j] != c[i]) {
		   return 1;
	   }
	   i--;
	   j++;
   }
   return 0;
}

struct pal *mergesort1(struct pal *front){
	struct pal *front1;
	struct pal *front2;

	if((front == NULL) || (front->next == NULL)) return front;

	front1 = front;
	front2 = front->next;

	while((front2 != NULL) && (front2->next != NULL)) {
		front = front->next;
		front2 = front->next->next;
	}

	front2 = front->next;
	front->next = NULL;

	return merge(mergesort1(front1), mergesort1(front2));
}

struct pal *merge(struct pal *front1, struct pal *front2) {
	struct pal *front3;

	if(front1 == NULL)
		return front2;

	if(front2 == NULL)
		return front1;

	if(strcmp(front1->word, front2->word)<0){
		front3 = front1;
		front3->next = merge(front1->next, front2);
	}else{
	  front3 = front2;
	  front3->next = merge(front1, front2->next);
	}

	return front3;
}

void condense(struct pal *front){
	if(front->next != NULL){
		struct pal *curr = front->next, *back = front;

		while(curr != NULL){
			if(strcmp(back->word, curr->word) == 0){
				back->count++;
				back->next = curr->next;
				free(curr);
				curr = back->next;
			}else{
				back = curr;
				curr = curr->next;
			}
		}
	}
}

When you are reading from the file the value of MAX is 101. But each line has less than 101 characters . So what you want to do is something of this sort

int
main()
{
        FILE *fp = fopen("mytestfile.txt","r");
        char arr[100];                    // Open a file
        char *p;

        if(fp == NULL)
        {
                printf("File does not exist");
                return -1;
        }

        while(fgets(arr,100,fp)!=NULL)          // Read from the file
        {                                       // Assume each line will have less than 100 character 
                p = strchr(arr,'\n');           //Each line will be terminated by \n. Find the position of the \n 
                arr[p - arr] = '\0';            // Replace the \n by \0
                printf("%s\n",arr);
                strcpy(arr,"\0");               // Clear the array
        }
  
        fclose(fp);
        return 0;
}

The first problem I found is the while loop in main(). It has some nice bits of logic, but the overall program flow is flawed.


This is your while loop, with just a few changes:
1) The newline char is now removed from line[], right away
2) The FILE pointer you called "file", I changed immediately to "fp". Although C is case sensitive, I won't put up with that kind of name confusion.
3) I added the int variable j, and moved i and j declarations, to the top of the function.

The problem is that word[] was being sent to Palindrome(), and word[] had only one valid letter in it.

while((fgets(line,MAX,fp))){					//for some reason, this produces
		//int i;										//line of second mixed with first
		printf("%s\n",line);
		fflush(stdout);
		for(i = 0; line[i] != '\n'; i++){
			c = line[i];  //why assign an int, a char from the array of line?
      //remove the newline char:
      j = strlen(line);
      if((line[j-1])=='\n')
        line[j-1]='\0';

			if(isalpha(c)){
				word[wl] = c;
				wl++;
			}else{
				if(wl>0){
					word[wl] = '\0';
					printf("%s",word);			//for testing
					fflush(stdout);
					words++;
				}else{
					continue;					//if it's a single non-alpha
				}								//it's not a word
			}

			if(isPalindrome(word) == 0){ //not good here - word holds trash atm.
				new = (struct pal*)malloc(sizeof(struct pal));
				new->word = word;
				new->count = 1;
				if(front->count == -1){
					front = new;
				}else{
					new->next = front;
					front = new;
				}
			}

			wl = 0; //each letter then gets stuffed into word[0]
		}
	}

So Palindrome() is being called WAY too soon. Maybe you wanted to send the line[] array, I'm not sure, but word[] has 1 letter, and the rest is trash contents.

Then wl (word length I guess), is being reset to 0, so every letter goes into only word[0]. Move wl=0, to where it should be, in your logic.

P.S. it would be nice if you set your output for unbuffered. Now you wouldn't need all that fflush(stdout) business.

commented: Great Advice! +1

@Adak: how would i set the output for unbuffered? and thanks for the great tips!

ok, so i redid the code (and it looks prettier), the only problem i'm running into is that in case 3, the 'word' created includes all characters, not just individual words. it successfully counts the words and lines, but i'm having trouble putting those words into the struct i made. any suggestions?

case: 4 will be the same as case: 3, only printing the count next to each word (which is achieved in condense), so the only thing i need to fix is case: 3 which reads all the words and extracts palindromes

/*
 * wordstat.c
 *
 *  Created on: Oct 5, 2010
 *      Author: charlieg
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define MAX 100				/*max number of char per line*/

struct pal{
	char *word;
	short int count;
	struct pal *next;
};

int isPalindrome(char *c);
struct pal *mergesort1(struct pal *front);
struct pal *merge(struct pal *front1, struct pal *front2);
void condense(struct pal *front);

int main(int argc, char* argv[]){
	FILE *fp;
	int i, wl = 0, x, lines = 0, words = 0;
	char c, p;
	char line[MAX], word[MAX];

	struct pal *front = (struct pal*)malloc(sizeof(struct pal));
	struct pal *new, *ptr;


	front->count = -1;

	if(strcmp("-h",argv[1])==0) x = 0;
	else if (strcmp("-l",argv[1])==0) x = 1;
	else if (strcmp("-w",argv[1])==0) x = 2;
	else if (strcmp("-p",argv[1])==0) x = 3;
	else if (strcmp("-pf",argv[1])==0) x = 4;

	fp = fopen(argv[2], "r");

	if(fp == NULL){
		fprintf(stderr, "%s is not a readable .txt file\n",argv[2]);
		exit(-2);
	}


	switch(x){
	case 0:
		printf("Usage: ./wordstat <option> <filename>\nOptions:\n"
					"%40s\n%40s\n%44s\n%44s\n", "-l: number of lines",
					"-w: number of words","-p: palindrome's statistics", "-pf: palindrome's frequency");

		return 0;
	case 1:						/* took forever, but the text must be typed, otherwise the \n isn't added */
		do{
			p = fgetc(fp);
			if(p == '\n') lines++;
		}while(p != EOF);

		fclose(fp);

		printf("%d\n", lines);

		return 0;
	case 2:
		while(fgets(line,MAX,fp) != NULL){
			for(i = 0; i<strlen(line);i++){
				c = line[i];
				if(isalpha(c)){
					wl++;
				}else{
					if(wl>0){
						words++;
						wl = 0;
						continue;
					}else continue;
				}
			}
		}

		fclose(fp);

		printf("%d\n", words);

		return 0;
	case 3:
		for(i = 0; i<strlen(line);i++){
			c = line[i];

			if(isalpha(c)){
				word[wl] = toupper(c);
				wl++;
			}else{
				if(wl>0){
					word[wl] = '\0';
					wl = 0;

					printf("%s",word);

					if(isPalindrome(word) == 0){
						if(front->count == -1){
							front->count = 1;
							front->word = word;
						}else{
							new = (struct pal*)malloc(sizeof(struct pal));
							new->word = word;
							new->count = 1;
							new->next = front;
							front = new;
						}
					}
				}
				memset(word,0,sizeof(word));
			}
		}

		fclose(fp);

	/*	front = mergesort1(front);	*/	/*sort*/
	/*	condense(front);			*/	/*condense duplicates*/

		ptr = front;

		while(ptr != NULL){
			printf("\t%s \n", ptr->word);
			ptr = ptr->next;
		}

		return 0;
	case 4:
		printf("%d\n", isPalindrome("madam"));
		printf("%d\n", isPalindrome("madam "));
		printf("%d\n", isPalindrome("rawr"));
		printf("%d\n", isPalindrome("racecar"));

		return 0;
	default:
		fprintf(stderr, "%s is not a valid option\ntype './wordstat -h <input file> for help\n\n",argv[1]);
		exit (-1);
	}
	return 0;
}

/********************************************************/

int isPalindrome(char *c)
{
   int i = strlen(c)-1;
   int j = 0;

   while (j <= i){
	   if(c[j] != c[i]) {
		   return 1;
	   }
	   i--;
	   j++;
   }
   return 0;
}

/********************************************************/

struct pal *mergesort1(struct pal *front){
	struct pal *front1;
	struct pal *front2;

	if((front == NULL) || (front->next == NULL)) return front;

	front1 = front;
	front2 = front->next;

	while((front2 != NULL) && (front2->next != NULL)) {
		front = front->next;
		front2 = front->next->next;
	}

	front2 = front->next;
	front->next = NULL;

	return merge(mergesort1(front1), mergesort1(front2));
}

struct pal *merge(struct pal *front1, struct pal *front2) {
	struct pal *front3;

	if(front1 == NULL)
		return front2;

	if(front2 == NULL)
		return front1;

	if(strcmp(front1->word, front2->word)<0){
		front3 = front1;
		front3->next = merge(front1->next, front2);
	}else{
	  front3 = front2;
	  front3->next = merge(front1, front2->next);
	}

	return front3;
}

/********************************************************/

void condense(struct pal *front){
	if(front->next != NULL){
		struct pal *curr = front->next, *back = front;

		while(curr != NULL){
			if(strcmp(back->word, curr->word) == 0){
				back->count++;
				back->next = curr->next;
				free(curr);
				curr = back->next;
			}else{
				back = curr;
				curr = curr->next;
			}
		}
	}
}

Setting your buffering for stdout depends on:

1) Your OS. It may set it as buffered or unbuffered. Unbuffered is preferred.
2) Your compiler Should have an "environmental" variable for this

3) You may be able to explicitly set or override it by:
a) putting a \n a the end of each printf(" <something> \n");
b: using setbuff();

#include <stdio.h>

/* BUFSIZ is defined in stdio.h */
char outbuf[BUFSIZ];  //for setting buffered output only

int main(void)
{
   /* attach a buffer to the standard output stream */
   setbuf(stdout, outbuf);
[B]   setbuf(stdout, NULL); //removes buffering from stdout[/B]

   /* put some characters into the buffer */
   puts("This is a test of buffered output.\n\n");
   puts("This output will go into outbuf\n");
   puts("and won't appear until the buffer\n");
   puts("fills up or we flush the stream.\n");

   /* flush the output buffer */
   fflush(stdout);

   return 0;
}

The above shows how to set, reset, and test buffering for stdout, for dos & Windows to XP.

If you're using Linux, ask in your Linux distro's forum or the general Linux forum. They're quite knowledgeable.

i figured out where my program is going wrong, and i can't figure out why.

in case 3, it checks correctly if it's a palindrome or not and makes the new struct just fine, but when i go to add it to the beginning of the list, it overrides whatever was already in the front

if(isalpha(c)){
					word[wl] = toupper(c);
					wl++;
				}else{
					if(wl>0){
						word[wl] = '\0';
						wl = 0;

						if(isPalindrome(word) == 0){
							if(front->count == -1){
								front->count = 1;
								front->w = word;
							}else{
								new = (struct pal*)malloc(sizeof(struct pal));
								new->w = word;
								new->count = 1;
								new->next = front;
								front = new;

								printf("%s\t%d\n",front->w,front->count);
								printf("%s\t%d",front->next->w,front->count);
							}
						}
					}
					memset(word,0,sizeof(word));
				}

the problem was i was never taught strdup(), and that's what i needed to copy the strings in the structs.

thanks for help.

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.