0

Can anyone help me with external direct merge sorting?

Here is my code

#include<iostream>
#include<conio.h>
#include<stdio.h>
using namespace std;

int GetKey(char infoLine[30]) //Getting sorting key
{
	char temps[30];
	strcpy(temps,infoLine);
	return atoi(strtok(temps," ")); //returns first number in a line
}

void DirectMerge(char fileName[20]) 
{
	FILE* FTS;    //file wich will be sorted
	FILE *A,*B;   //временные файлы
	long int numKeys=0;       //number of numbers in file
	char temp1[30],temp2[30]; //current numbers from each of temporary files
	FTS=fopen(fileName,"rt");
	while(!feof(FTS)) //getting number of numbers in file
	 {
		 fgets(temp1,30,FTS);
		 numKeys++;
	 }
	fclose(FTS);
	long int iterator=1; //number of numbers in one sequence
	while(iterator<numKeys) //while length of sequence is smaller than number of numbers is file
	 {
		 FTS=fopen(fileName,"rt");
		 A=fopen("a.txt","wt");
		 B=fopen("b.txt","wt");
		 for(int i=0;i<numKeys/2;i++) //reading half of numbers to the first temp file
		  {
			  fgets(temp1,30,FTS);
			  fputs(temp1,A);
		  }
		 if((numKeys/2)%iterator!=0) //read more numbers if number of previously read numbers doesn't contain integer number of sequences 

		  {
			  int tmp=numKeys/2;
			  while(tmp%iterator!=0)
			   {
				   fgets(temp1,30,FTS);
				   fputs(temp1,A);
				   tmp++;
			   }
		  }
		 while(!feof(FTS)) //all other numbers write to second temp file
		  {
			  fgets(temp2,30,FTS);
			  fputs(temp2,B);
		  }
		 fclose(FTS);
		 fclose(A);
		 fclose(B);
         //phase 2 -merging temp files
		 FTS=fopen(fileName,"wt");             
		 A=fopen("a.txt","rt");
		 B=fopen("b.txt","rt");
		 fgets(temp1,30,A); 
                 fgets(temp2,30,B); 
		 bool sh1=1,sh2=1;
		 bool nl1=0,nl2=0;
		 if(feof(A)) nl1=1;
		 if(feof(B)) nl2=1;
		 bool once=1;
		 while((!feof(A) && !feof(B)) || once)
		  {
			  int iterA=0,iterB=0; //number of read numbers on current iteration from the first and the second files
			  bool iterCon=1; //if we need to continue iterations
			  while((iterCon && !feof(A) && !feof(B)) || (once && iterCon))
			   {
				   if(GetKey(temp1)<=GetKey(temp2)) //compare keys
				    {
						fputs(temp1,FTS); //write to the file
						if(nl1) fputc('\n',FTS);
						sh1=1;
						if(fgets(temp1,30,A)!=NULL) sh1=0; else once=0;
						iterA++; //increase number of numbers read from the first file
				    } else
				    {
						fputs(temp2,FTS);
						if(nl2) fputc('\n',FTS);
						sh2=1;
						if(fgets(temp2,30,B)!=NULL) sh2=0; else once=0;
						iterB++;
					}
					if(iterA==iterator || iterB==iterator) iterCon=0; //if  in one of the files number of read numbers is equal to the length of the sequence on this iteration then stop merging on this iteration
			   }                      


			  if(!iterCon) //if in one of the files has unfinished sequence

			   {
				   while(iterA<iterator)
				    {
						fputs(temp1,FTS);
						sh1=1;
						if(fgets(temp1,30,A)!=NULL) sh1=0; else break;
						iterA++;
				    }
				   while(iterB<iterator)
				    {
						fputs(temp2,FTS);
						sh2=1;
						if(fgets(temp2,30,B)!=NULL) sh2=0; else break;
						iterB++;
				    }
			   }
		  }
		 while(!feof(A)) //if one of files was larger than another than write it's ending
		  {
			  fputs(temp1,FTS);
			  sh1=1;
              if(fgets(temp1,30,A)!=NULL) sh1=0;
		  }
		 while(!feof(B))
		  {
			  fputs(temp2,FTS);
			  sh2=1;
			  if(fgets(temp2,30,B)!=NULL) sh2=0;  
		  }
		 if(!sh1) fputs(temp1,FTS); //if line was read but was not written into result file because of the end of file
		 if(!sh2) fputs(temp2,FTS);
		 fclose(FTS);  //close files
		 fclose(A);
		 fclose(B);
		 iterator*=2; //increase length of sequence
	 }
	remove("a.txt");
	remove("b.txt");
	ShowFile(fileName);
}

int main()
{
	char namer[20];
	cin>>namer;
	DirectMerge(namer);
}

Actually it's a translation of pascal code

var
s:string;
t:text;

Procedure MergeSort(name: string; var f: text);
          Var a1,a2,s,i,j,kol,tmp: integer;
              f1,f2: text;
              b: boolean;
          Begin
             kol:=0;

             Assign(f,name);
             Reset(f);
             While not EOF(f) do
               begin
                 read(f,a1);
                 inc(kol);
               End;
             Close(f);

             Assign(f1,'{имя 1-го вспомогательного файла}.txt');
             Assign(f2,'{имя 2-го вспомогательного файла}.txt');

             s:=1;
             While (s<kol) do
               begin

                 Reset(f); Rewrite(f1); Rewrite(f2);
                 For i:=1 to kol div 2 do
                   begin
                     Read(f,a1);
                     Write(f1,a1,' ');
                   End;
                 If (kol div 2) mod s<>0 then
                   begin
                     tmp:=kol div 2;
                     While tmp mod s<>0 do
                       begin
                         Read(f,a1);
                         Write(f1,a1,' ');
                         inc(tmp);
                       End;
                   End;
                 While not EOF(f) do
                   begin
                     Read(f,a2);
                     Write(f2,a2,' ');
                   End;
                 Close(f); Close(f1); Close(f2);


                 Rewrite(f); Reset(f1); Reset(f2);
                 Read(f1,a1);
                 Read(f2,a2);
                 While (not EOF(f1)) and (not EOF(f2)) do
                   begin
                     i:=0; j:=0;
                     b:=true;
                     While (b) and (not EOF(f1)) and (not EOF(f2)) do
                       begin
                         If (a1<a2) then
                           begin
                             Write(f,a1,' ');
                             Read(f1,a1);
                             inc(i);
                           End
                         else
                           begin
                             Write(f,a2,' ');
                             Read(f2,a2);
                             inc(j);
                           End;
                         If (i=s) or (j=s) then b:=false;
                       End;
                     If not b then
                       begin
                         While (i<s) and (not EOF(f1)) do
                           begin
                             Write(f,a1,' ');
                             Read(f1,a1);
                             inc(i);
                           End;
                         While (j<s) and (not EOF(f2)) do
                           begin
                             Write(f,a2,' ');
                             Read(f2,a2);
                             inc(j);
                           End;
                       End;
                   End;
                 While not EOF(f1) do
                   begin
                     tmp:=a1;
                     Read(f1,a1);
                     If not EOF(f1) then
                       Write(f,tmp,' ')
                     else
                       Write(f,tmp);
                   End;
                 While not EOF(f2) do
                   begin
                     tmp:=a2;
                     Read(f2,a2);
                     If not EOF(f2) then
                       Write(f,tmp,' ')
                     else
                       Write(f,tmp);
                   End;
                 Close(f); Close(f1); Close(f2);

                 s:=s*2;
                 readln;
               End;
             Erase(f1);
             Erase(f2);
          End;

Begin
MergeSort('input.txt',t);
end.

but pascal program sorts file with numbers in one line and I need a program that sorts lines with sort keys in the beginning of the line.The main problem is that Pascal EOF and C++ feof work different so I added a lot checks but finally there is still something wrong about the code.
For example for input file

1 крутота
3 огог
2 зомби
5 ломби
7 йохохо
6 трулаа
1
1
3
2
11
27
200 токены
156
1 шмокены
24
56 лоло

the output is

1 крутота
1
1 шмокены
1
2 зомби
2
3
3 огог
5 ломби
6 трулаа
7 йохохо
11
24
27
27
56 лоло156
200 токены

were 156 gets into the end of line "56 лоло" and there are duplicate 27 numbers .It seems that end of line symbol was lost somewhere but I can't find were.

2
Contributors
1
Reply
5
Views
6 Years
Discussion Span
Last Post by nezachem
0

Pascal and C deetct end-of-file in a different way. Not getting into details, a while(!feof()) idiom is unsafe in C. A correct way to read a file is while(fgets() != 0)

This topic has been dead for over six months. Start a new discussion instead.
Have something to contribute to this discussion? Please be thoughtful, detailed and courteous, and be sure to adhere to our posting rules.