data printout (continually) into many files

Question

idgeza 0 Newbie Poster

15 Years Ago

Hi all,

I have a following code below, where I generate a matrix and lots of data, and finally I print out that data into one .dat file.
Now I would like to share these data into many files, e.g. 4 .dat files in a continual way.

To see what I am talking about, check my code in the attachment!

I would like to have 4 .dat files. For example if I generate N=100 numbers then I would like to have data1.dat file containing number of 25 entries, data2.dat file with 50 entries, data3.dat file with 75 entries, and data4.dat having num. 100 entries.

In the "for" loop I originally have datavecA.dat and datavecB.dat. Instead of these now I would like to have 4-4 files, i.e. datavecA_1. dat, datavecB_1.dat; datavecA_2.dat, datavecB_2.dat; ...etc.

I think I should use if-else if statement, but I am not sure about that... :(

Thank you in advance!

c++

daniweb.cpp (4.21 KB)

#include <cstdlib>  
#include <fstream>
#include <iostream>
#include <ctime>
#include <cmath>
using namespace std;

const float M_2PIf = 6.283185307179586476925286766559f;
const float M_PIf  = 3.141592653589793238462643383279f;

struct vector {
  float x, y, z;
};

inline float function_vec(const vector &left, const vector &right)
{
	return(left.x * right.x + left.y * right.y + left.z * right.z);
}

class Matrix3x3 {

private:

 	float tomb[3][3];  // 3x3 matrix

public:
        void init(vector &a){ 

	tomb[0][0] = a.x * a.x;
	tomb[0][1] = a.x * a.y;
	tomb[0][2] = a.x * a.z;
	tomb[1][0] = a.y * a.x;
	tomb[1][1] = a.y * a.y;
	tomb[1][2] = a.y * a.z;
	tomb[2][0] = a.z * a.x;
	tomb[2][1] = a.z * a.y;
	tomb[2][2] = a.z * a.z;
    	}

 Matrix3x3 &operator +=(Matrix3x3 b){
 int k,l;
 for(k=0; k<3; k++)
 for(l=0; l<3; l++) 
  tomb[k][l] += b.tomb[k][l];
 return *this;
 }

 Matrix3x3 &operator/=(float c){
 int k,l;
 for(k=0; k<3; k++)
 for(l=0; l<3; l++)
  tomb[k][l] /= c;
 return *this;
 }

 float element(int i,int j) const {
   return tomb[i][j];
 }

};

ostream& operator << (ostream& output, const Matrix3x3& s){
	output << "[\n";
	for(int i=0; i<3; i++){
 		for(int j=0; j<3; j++)
   			output << "\t" << s.element(i,j) << "\t";
		output << "\n"; 
	}
	output << "]";

	return output;
}

// Function prototypes
float rand(float min, float max); 
float vec_angle(const vector &left, const vector &right);
vector function(float m[][3], vector v);
// main function
int main(void) {
	int N = 100;
	float epsilon = 1.0;
        float cosangle[N];
	    //  >>>>>>>>> HISTOGRAMM <<<<<<<<<<<<
	double hmin = 0.0;
	double hmax = 1.0;
	int hbins = 10;
	double* h;
	h = new double[hbins];
	for ( int bin = 0 ; bin<hbins ; bin++ ) h[bin] = 0.0;
	   //  >>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<

	Matrix3x3 m[N];
	vector vec = {0.0,0.0,1.0};
    	vector vec0 = {0.0,0.0,1.0};
	int i, j;

	srand((unsigned)time(NULL));

	ofstream ofile;
        ofile.open("datavecA.dat");

	for (i = 0; i < N; i++) 
	{ 

	    //  >>>>>>>>> ________________ <<<<<<<<<<<

    	float mX[3][3] = {
                             {1.0f, 0.0f, 0.0f},
                             {0.0f, 1.0f, 0.0f},
                             {0.0f, 0.0f, 1.0f}
                             };
    	float mY[3][3] = {
                             {1.0f, 0.0f, 0.0f},
                             {0.0f, 1.0f, 0.0f},
                             {0.0f, 0.0f, 1.0f}
                             };
    	float mZ[3][3] = {
                             {1.0f, 0.0f, 0.0f},
                             {0.0f, 1.0f, 0.0f},
                             {0.0f, 0.0f, 1.0f}
                             };

	    //  >>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<

            switch (rand()%3) 
	    { 
        	case 0:
    		vec = function(mX, vec);
            	break;
        	case 1:
    		vec = function(mY, vec);
            	break;
        	case 2:
    		vec = function(mZ, vec);
            	break;
            }

        cosangle[i] = vec_angle(vec0,vec);
	 
	ofile << cosangle[i] << endl;
		
		int bin=(int) ((cosangle[i]-hmin)/(hmax-hmin)*hbins);
		if ( bin<0 ) bin=0;
		if ( bin>hbins-1 ) bin=hbins-1;
		h[bin]++;

	        m[i].init(vec);
    	 }  
	ofile.close();

		ofstream file;
		file.open("datavecB.dat");
		file << "#position\tvalue\t\thalf-binwidth\terror" << endl;
		for ( int bin=0 ; bin<hbins ; bin++ )
		{
		double X=hmin+(bin+0.5)*(hmax-hmin)/hbins;
		double dX=0.5*(hmax-hmin)/hbins;
		double Y=h[bin];
		double dY=sqrt(h[bin]);
		file << " " << X << "\t\t" << Y << "\t\t" << dX << "\t\t" << dY << endl;
		}
		file.close();

		delete[] h;
/****************************************************/
    	Matrix3x3 sum = m[0]; 

    	for (i=0;i<N;i++) {
	sum += m[i];
    	}
	sum /= N;
	
	cout << sum << endl;

return 0;
}

float rand(float min, float max)
{
	return min + (max - min) * rand() / (float)RAND_MAX;
}

vector function(float m[][3], vector v)
{
    vector returnv;
    returnv.x = m[0][0]*v.x + m[0][1]*v.y + m[0][2]*v.z;
    returnv.y = m[1][0]*v.x + m[1][1]*v.y + m[1][2]*v.z;
    returnv.z = m[2][0]*v.x + m[2][1]*v.y + m[2][2]*v.z;

    return returnv;
}

float vec_angle(const vector &left, const vector &right)
{
	vector v1 = left, v2 = right;
	float dot = function_vec(v1,v2);
	if(fabsf(dot)>=1.0f) dot = 0.999f;
	return acosf(dot);
}

2 Contributors
25 Replies
120 Views
5 Days Discussion Span
Latest Post 15 Years Ago Latest Post by Murtan

All 25 Replies

Murtan 317 Practically a Master Poster

15 Years Ago

200 lines of code is not unreasonable to post, you should just have posted it in code tags. (There are lot of people that won't download code. I don't do it very often.)

The "vecA" files would be fairly straight forward to do after the loop that does the calculations, it is just printing elements from the cosangle array. I would wrap a four pass for loop around code that would output 1/4th of the file at a time.

The "vecB" file only have 10 lines at present is there any point to splitting them into 4 files? If so, you could do it in a similar fashion to what I suggested above.

If you have any trouble with the above, make an effort at writing it and post the code (in code tags) along with a description of the problem and I'll help you figure it out.

When posting c++ code, please use c++ code tags

Edited 12 Years Ago by happygeek because: fixed formatting

Murtan 317 Practically a Master Poster

15 Years Ago

I understood what you said you wanted to do.

Apparently you didn't understand what I was recommending.

Right now you have something like this:

ofstream ofile;
        ofile.open("datavecA.dat");

	for (i = 0; i < N; i++) 
	{ 
		// I skipped some of your stuff here...
		cosangle[i] = vec_angle(vec0,vec);
	 
		ofile << cosangle[i] << endl;

		// histogram data calculated from all 100 entries into 10 bins		
		int bin=(int) ((cosangle[i]-hmin)/(hmax-hmin)*hbins);
		if ( bin<0 ) bin=0;
		if ( bin>hbins-1 ) bin=hbins-1;
		h[bin]++;

		// I skipped another line or two here...
    	 }  
	ofile.close();

Would you agree that the file would have the same contents if the above code was written as:

for (i = 0; i < N; i++) 
	{ 
		// I skipped some of your stuff here...
		cosangle[i] = vec_angle(vec0,vec);
	 
		// histogram data calculated from all 100 entries into 10 bins		
		int bin=(int) ((cosangle[i]-hmin)/(hmax-hmin)*hbins);
		if ( bin<0 ) bin=0;
		if ( bin>hbins-1 ) bin=hbins-1;
		h[bin]++;

		// I skipped another line or two here...
    	 }  

	ofstream ofile;
        ofile.open("datavecA.dat");
	for (i = 0; i < N; i++) 
		ofile << cosangle[i] << endl;
	ofile.close();

Now that last file write could be broken up into multiple pieces (as many as you care to) fairly easily without having to 'muck up' the main loop. (Note however that each N only adds one line with a single number -- 100 lines is still a pretty small file.)

The histogram data really can't be written at all until the main loop is complete either, and it doesn't have the same N as the main loop and the cosangle stuff. You currently write it like this:

ofstream file;
		file.open("datavecB.dat");
		file << "#position\tvalue\t\thalf-binwidth\terror" << endl;
		for ( int bin=0 ; bin<hbins ; bin++ )
		{
		double X=hmin+(bin+0.5)*(hmax-hmin)/hbins;
		double dX=0.5*(hmax-hmin)/hbins;
		double Y=h[bin];
		double dY=sqrt(h[bin]);
		file << " " << X << "\t\t" << Y << "\t\t" << dX << "\t\t" << dY << endl;
		}
		file.close();

Which would also support being broken up if you found that advantageous. (Note that in your current code, hbins is 10 so there should only be 10 lines in this file.)

There are a couple of sub-calculations that you repeat that might best be done once and kept in a temporary. In fact the dX calculation is constant if performed outside the for loop (as it doesn't reference the loop index).

Murtan 317 Practically a Master Poster

15 Years Ago

I was thinking something like this, but I would much rather have seen you try something than just give it to you. Demonstrate effort, you get lots more help.

int nextindex = 0;
for (int ii = 1; ii <= numberofparts; ii++)
{
   // build the filename to open
   // open the file here
   int part_end_index = (ii * N) / numberofparts;
   for (; nextindex < part_end_index; nextindex++)
   {
      // output the data for [nextindex]
   }
   // close the file
}

Murtan 317 Practically a Master Poster

15 Years Ago

I thought I showed you what I recommended.

I thought I showed you that you could take the output out of the big loop.

The goal would be not to repeat the same code for as many times as you have data files.

If you insist on doing it that way, at least try to turn some of it into a function.

You can't output the histogram data until you have processed all of the data in the main loop.

You can't write part of the histogram data out if you've only processed part of the data.

If you're going to open and close the file for each iteration, you're going to have to open the file in append mode to add to the end of the file, but then somewhere you have to either delete the file before you output, or open the file normally to clear it out.

Murtan 317 Practically a Master Poster

15 Years Ago

First, let me repeat again, at the sizes you are currently configured for, breaking the output up into multiple files is unwarranted.

If we're only at the testing level now (with N at 100 and hbins at 10) and the numbers are going to get a lot larger then it might be worth the effort.

int nextindex = 0;
for (int ii = 1; ii <= numberofparts; ii++)
{
   // build the filename to open
   // open the file here
   int part_end_index = (ii * N) / numberofparts;
   for (; nextindex < part_end_index; nextindex++)
   {
      // output the data for [nextindex]
   }
   // close the file
}

The above code (which has a few pieces for you to work on still) was intended to replace the output routine that I had following the main loop (in the example where I moved the output code from around and inside the big loop).

This code was only intended to handle the vecA part of the output, but if you need to break up the vecB files (the data histogram) you could apply a similar routine, but add the output fo the file header just after where the file is opened.

I'm not sure what the intent of the vecC file is, but you appear to be only writing one number in it.

Reply to this topic

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.

idgeza 0 Newbie Poster · Answer 1 · 2009-12-11T14:25:34+00:00

Hello, ok, well I write the main() function of my code:

// main function
int main(void) {

	int N = 100;
	float epsilon = 1.0;
        float cosangle[N];
	    //  >>>>>>>>> HISTOGRAMM <<<<<<<<<<<<
	double hmin = 0.0;
	double hmax = 1.0;
	int hbins = 10;
	double* h;
	h = new double[hbins];
	for ( int bin = 0 ; bin<hbins ; bin++ ) h[bin] = 0.0;
	   //  >>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<

	Matrix3x3 m[N];
	vector vec = {0.0,0.0,1.0};
    	vector vec0 = {0.0,0.0,1.0};
	int i, j;

	srand((unsigned)time(NULL));

	ofstream ofile;
        ofile.open("datavecA.dat");

	for (i = 0; i < N; i++) 
	{ 

	    //  >>>>>>>>> ________________ <<<<<<<<<<<

    	float mX[3][3] = {
                             {1.0f, 0.0f, 0.0f},
                             {0.0f, 1.0f, 0.0f},
                             {0.0f, 0.0f, 1.0f}
                             };
    	float mY[3][3] = {
                             {1.0f, 0.0f, 0.0f},
                             {0.0f, 1.0f, 0.0f},
                             {0.0f, 0.0f, 1.0f}
                             };
    	float mZ[3][3] = {
                             {1.0f, 0.0f, 0.0f},
                             {0.0f, 1.0f, 0.0f},
                             {0.0f, 0.0f, 1.0f}
                             };

	    //  >>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<

            switch (rand()%3) 
	    { 
        	case 0:
    		vec = function(mX, vec);
            	break;
        	case 1:
    		vec = function(mY, vec);
            	break;
        	case 2:
    		vec = function(mZ, vec);
            	break;
            }

        cosangle[i] = vec_angle(vec0,vec);
	 
	ofile << cosangle[i] << endl;
		
		int bin=(int) ((cosangle[i]-hmin)/(hmax-hmin)*hbins);
		if ( bin<0 ) bin=0;
		if ( bin>hbins-1 ) bin=hbins-1;
		h[bin]++;

	        m[i].init(vec);
    	 }  
	ofile.close();

		ofstream file;
		file.open("datavecB.dat");
		file << "#position\tvalue\t\thalf-binwidth\terror" << endl;
		for ( int bin=0 ; bin<hbins ; bin++ )
		{
		double X=hmin+(bin+0.5)*(hmax-hmin)/hbins;
		double dX=0.5*(hmax-hmin)/hbins;
		double Y=h[bin];
		double dY=sqrt(h[bin]);
		file << " " << X << "\t\t" << Y << "\t\t" << dX << "\t\t" << dY << endl;
		}
		file.close();

		delete[] h;
/****************************************************/
    	Matrix3x3 sum = m[0]; 

    	for (i=0;i<N;i++) {
	sum += m[i];
    	}
	sum /= N;
	
	cout << sum << endl;

return 0;
}

Now in the big for() loop I would like to cut the generated data according to the number of iterations N.

for (i = 0; i < N; i++) 
	{ 

	    //  >>>>>>>>> ________________ <<<<<<<<<<<

    	float mX[3][3] = {
                             {1.0f, 0.0f, 0.0f},
                             {0.0f, 1.0f, 0.0f},
                             {0.0f, 0.0f, 1.0f}
                             };
    	float mY[3][3] = {
                             {1.0f, 0.0f, 0.0f},
                             {0.0f, 1.0f, 0.0f},
                             {0.0f, 0.0f, 1.0f}
                             };
    	float mZ[3][3] = {
                             {1.0f, 0.0f, 0.0f},
                             {0.0f, 1.0f, 0.0f},
                             {0.0f, 0.0f, 1.0f}
                             };

	    //  >>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<

            switch (rand()%3) 
	    { 
        	case 0:
    		vec = function(mX, vec);
            	break;
        	case 1:
    		vec = function(mY, vec);
            	break;
        	case 2:
    		vec = function(mZ, vec);
            	break;
            }

		// Here I would like to get cosangle, sum,... into different files

		// I think I should have something like this

		if ( i>=0 && i<10 ){
			// cosangle[i], sum, ... so datavecA.dat, datavecB.dat -> datavecA_1.dat, datavecB_1.dat
		} 
		else if ( i>=10 && i<20 ){
			// cosangle[i], sum, ... -> datavecA_2.dat, datavecB_2.dat
		}
		else if ( i>=20 && i<30 ){
			// cosangle[i], sum, ... -> datavecA_3.dat, datavecB_3.dat
		}
		else if ( i>=30 && i<40 ){
			// cosangle[i], sum, ... -> datavecA_3.dat, datavecB_3.dat
		}
		
		...		

		else {
			// exit(1) or do nothing
		}

       }

So just split the original files into different files according to the number N of the for() loop.

idgeza 0 Newbie Poster · Answer 2 · 2009-12-11T15:51:16+00:00

Ok, I see what you are talking about, that's right. But is there any way to do that, namely split the files in the program (what about the if- else if statement?)?

I just want to collect the generated data separately into that files... Maybe I should use some dynamical arrays... ??

Please help me to solve this!

idgeza 0 Newbie Poster · Answer 3 · 2009-12-11T19:08:40+00:00

Ok, I would imagine something like this:

float* costheta = new float[N];
double *h = new double[hbins];
Matrix3x3 *m = new Matrix3x3[N];

	for(int i=0; i<N; ++i)
	{
                        // There are some stuff here...

			costheta[i] = vec_cosangle(vec0,vec);
	        	m[i].init(vec);
    			// will hold the sum of all of the matrices
    			Matrix3x3 sum = m[0];

			if ( i >=0 && i<10){

				int bin=(int) ((costheta[i]-hmin)/(hmax-hmin)*hbins);
			        if ( bin<0 ) bin=0;
		                if ( bin>hbins-1 ) bin=hbins-1;
			        h[bin]++;

		       	 	ofstream ofile;
				ofile.open("datavecA_1.dat");
				for (int i=0; i<10; ++i ){
				ofile << costheta[i] << endl;
				}
				ofile.close();

				ofstream file;
				file.open("datavecB_1.dat");
				for ( int bin=0 ; bin<hbins ; bin++ )
				{
				double X=hmin+(bin+0.5)*(hmax-hmin)/hbins;
				double dX=0.5*(hmax-hmin)/hbins;
				double Y=h[bin];
				double dY=sqrt(h[bin]);
				file << " " << X << "\t\t" << Y << "\t\t" << dX << "\t\t" << dY << endl;
				}
				file.close();
				ofstream f;
				f.open("datavecC_1.dat");
				for(int i=0;i<10000;++i){				
				sum += m[i];
				}
				sum /= 10000.0f;
				f <<  sum << endl;
				f.close();
			}

                       else if (i>10 && i<=20){
                       // the same as above, but with datavecA_2.dat, datavecB_2.dat, datavecC_2.dat
                       }

                       ... etc

             }

	delete[] costheta;
	delete[] h;
	delete[] m;

idgeza 0 Newbie Poster · Answer 4 · 2009-12-11T20:44:08+00:00

Yes, you are right and thank you, but can you post the corrected parts of the code (or a new one), because i was mixed up in something... :(

idgeza 0 Newbie Poster · Answer 5 · 2009-12-12T01:08:21+00:00

I tried to write something, but I get runtime error message, namely "Segmentation fault".
Anyway, what do you think, is this code correct or not? Thanks!

// some stuff here...               

                       if ( i >=0 && i<50 ){

			costheta[0][i] = vec_cosangle(vec0,vec);
			//code here to make a matrix from vec[i]
	        	m[0][i].init(vec);

			int bin=(int) ((costheta[0][i]-hmin)/(hmax-hmin)*hbins);
			if ( bin<0 ) bin=0;
			if ( bin>hbins-1 ) bin=hbins-1;
			h[bin]++;

			}

			else if ( i >=50 && i<100 ){

			costheta[1][i] = vec_cosangle(vec0,vec);
			//code here to make a matrix from vec[i]
	        	m[1][i].init(vec);

			int bin=(int) ((costheta[1][i]-hmin)/(hmax-hmin)*hbins);
			if ( bin<0 ) bin=0;
			if ( bin>hbins-1 ) bin=hbins-1;
			h[bin]++;

			}

			else { 
			cout << "There is something wrong..." << endl;
			}

		
	}  // this is the end of the big loop through the N vectors

	delete[] h;

    				// will hold the sum of all of the matrices
    				// code to initialize the sum matrix to all zeros
    				Matrix3x3 sum = m[2][N];

				// Write cosine of the angle into file:
		       	 	ofstream ofile;
				ofile.open("datavec_A.dat");
				for (int i=0; i<50; ++i)
					ofile << costheta[0][i] << endl;
				ofile.close();

				// Write the histogram into file:
				ofstream file;
				file.open("datavec_B.dat");
				// Write a legend line:
				file << "#position\tvalue\t\thalf-binwidth\terror" << endl;
				for ( int bin=0 ; bin<hbins ; bin++ )
				{
				// Position of histogram bin:
				double X=hmin+(bin+0.5)*(hmax-hmin)/hbins;
				// Half-binwidth of histogram bin:
				double dX=0.5*(hmax-hmin)/hbins;
				// Entries in histogram bin:
				double Y=h[bin];
				// Error of entries in histogram bin:
				double dY=sqrt(h[bin]);
				// Write the bin into file:
				file << " " << X << "\t\t" << Y << "\t\t" << dX << "\t\t" << dY << endl;
				}
				file.close();

				ofstream f;
				f.open("datavec_C.dat");
				for (int i; i<50;++i){
				sum += m[0][i];
				}
				sum /= 50.0f;
				f << sum << endl;
				f.close();

/****************************************************/

Murtan 317 Practically a Master Poster · Answer 6 · 2009-12-12T03:31:30+00:00

You appear to be having an extremely difficult time following what I thought were simple directions. I am apparently failing to communicate.

Let's get back to basics first.

Questions -- please answer them all:

Your current code has N (the limit for the big loop) set at 1000, what do you expect the maximum value to be?

Your current code has hbins (the number of bins in the histogram) set at 10, what do you expect the maximum value to be?

In your first code, the file "datavecA.dat" contained 1000 lines of data. Each line of data consisted of a single value. The value was a double. If you break the file up into parts, what do you expect to be the maximum number of lines in each part?

In your first code, the file "datavecB.dat" contained a title line and 10 data lines. The ten data lines contained four double values, preceded by a space and separated by two tabs each. If you break the file up into parts, what do you expect to be the maximum number of lines in each part?

In your next-to-the-last code you appeared to be creating a "datavecC.dat" file that contained the single number that was the sum of 10,000 other values. If this file is to be included, what do you expect the maximum number of lines in this file to be?

Back to commentary

The above questions are intended to justify the effort to split the files at all as I expect the files are currently trivially small.

One of the points that you seem to be missing is that I have NEVER encouraged you to be writing to more than one file at a time. As the number of lines in each file varies so greatly.

I would expect that the histogram data (datavecB) will likely only need a single file. This file cannot even start writing until you have binned all the data. The only exception I can think of would be if you want an independent histogram for each datavecA file.

If your N was large enough that you couldn't keep all of the costheta in memory at once, I could support writing the values out from inside 'the big loop'. An example algorithm that would support this would look something like the following:

// Before the loop starts, we need to declare 2 strings (or character arrays) for the current and the last datavecA filename. We will pre-initialize these to the first filename.
string fnCurrentDva = "datavec_A1.dat";
string fnLastDva = fnCurrentDva;
// We need to declare the output stream we will write from inside the loop and open the file for output
ofstream ofDva(fnCurrentDva.c_str());
// We need to pre-initialize the filenames to the first filename

oops it posted before I was done...I'll post the code example in a bit -- I wasn't done writing it.

Murtan 317 Practically a Master Poster · Answer 7 · 2009-12-12T04:23:20+00:00

Ok, I was going to compare the strings, but I decided that was inefficient so I switched to integers.

// I'm using strings
#include <string>
// I'm using stringstreams
#include <sstream>

// We need to declare the output stream we will write from inside the loop and open the file for output
ofstream ofDva("datavec_A1.dat");

// Two integers for the current datavecA index and the last datavecA index
int dvaIndex = 1;
int dvaIndexLast = dvaIndex;

// Now for the loop
for (i = 0; i < N; i++)
{
	// calculate the current dvaIndex from the loop index
	// change this formula as required to determine the maximum number of lines per file
	// or based on the number of parts you want.
	dvaIndex = 1 + i / 200;
	if (dvaIndex != dvaIndexLast)
	{
		// Output anything necessary before we close the file
		// --- fill in anything you think is appropriate

		// close the file we're done with this part
		ofDva.close();

		// generate the new filename
		ostringstream tfn;
		tfn << "datavec_A" << dvaIndex << ".dat";
		// open the file
		ofDva.open(tfn.str().c_str());
	}
	dvaIndexLast = dvaIndex;

	// The rest of the big loop goes here

	// including the output line
	ofDva << cosangle[i] << endl;

	// and the histogram binning lines
}

// we're done with the loop
// close the last file we had open
ofDva.close();

idgeza 0 Newbie Poster · Answer 8 · 2009-12-15T01:37:24+00:00

Hello,

that's ok, but if e.g. int i = 400, than I get only files containing 200 lines / file, i.e. every files contain 200 lines. Instead of this I would like to get files containing 200 and 400 lines. So I would like to write out the generated data in a continual way.

Could you give me some hint?

P.S. I should think that the if-else if statement would be the right, or not? What do you think about that?

Murtan 317 Practically a Master Poster · Answer 9 · 2009-12-15T05:14:54+00:00

I don't see any answers to the questions. I'll quote the questions again here, but I will not reply to this thread again unless you answer them:

Your current code has N (the limit for the big loop) set at 1000, what do you expect the maximum value to be?
Your current code has hbins (the number of bins in the histogram) set at 10, what do you expect the maximum value to be?
In your first code, the file "datavecA.dat" contained 1000 lines of data. Each line of data consisted of a single value. The value was a double. If you break the file up into parts, what do you expect to be the maximum number of lines in each part?
In your first code, the file "datavecB.dat" contained a title line and 10 data lines. The ten data lines contained four double values, preceded by a space and separated by two tabs each. If you break the file up into parts, what do you expect to be the maximum number of lines in each part?
In your next-to-the-last code you appeared to be creating a "datavecC.dat" file that contained the single number that was the sum of 10,000 other values. If this file is to be included, what do you expect the maximum number of lines in this file to be?

Regarding your last post, did you even bother to read the code I posted?

// calculate the current dvaIndex from the loop index
	// change this formula as required to determine the maximum number of lines per file
	// or based on the number of parts you want.
	dvaIndex = 1 + i / 200;

The comments specifically state that if you want a different number of lines per file, or if you want to split the file up into equal sized chunks, to modify the formula.

With the current formula and an N of 1000, you would get 5 files. The first 200 lines in file 1 (i=0 to i=199), 200 lines in file 2 (i=200 to i=399), 200 lines in file 3 (i=400 to i=599), 200 lines in file 4 (i=600 to i=799) and 200 lines in file 5 (i=800 to i=999).

If that isn't what you wanted to do, explain how what you want is different from that.
(But answer the questions too, or I will not reply)

idgeza 0 Newbie Poster · Answer 10 · 2009-12-15T14:53:57+00:00

Your current code has N (the limit for the big loop) set at 1000, what do you expect the maximum value to be?

Let's say int N = 10000

Your current code has hbins (the number of bins in the histogram) set at 10, what do you expect the maximum value to be?

int hbins = 200;

In your first code, the file "datavecA.dat" contained 1000 lines of data. Each line of data consisted of a single value. The value was a double. If you break the file up into parts, what do you expect to be the maximum number of lines in each part?

If N = 10000, and I'd like to have 10 different files, than the maximum number of lines of each part equals to 10000/10 = 1000.

In your first code, the file "datavecB.dat" contained a title line and 10 data lines. The ten data lines contained four double values, preceded by a space and separated by two tabs each. If you break the file up into parts, what do you expect to be the maximum number of lines in each part?

In each case the maximum number of line let equal to 200, i.e. the number of hbins.

In your next-to-the-last code you appeared to be creating a "datavecC.dat" file that contained the single number that was the sum of 10,000 other values. If this file is to be included, what do you expect the maximum number of lines in this file to be?

sum is just a matrix of 3x3, so this is just a single matrix in all case.

The comments specifically state that if you want a different number of lines per file, or if you want to split the file up into equal sized chunks, to modify the formula.

Ok, I see your statement, but could you give me a formula that can create different number of lines of .dat files, because I can not figure out so far...

With the current formula and an N of 1000, you would get 5 files. The first 200 lines in file 1 (i=0 to i=199), 200 lines in file 2 (i=200 to i=399), 200 lines in file 3 (i=400 to i=599), 200 lines in file 4 (i=600 to i=799) and 200 lines in file 5 (i=800 to i=999).

I would like to have 10 files in the case of N = 10000 according to another formula, which does the following modified method:
the first 1000 lines in file 1 (i=0 to i=999), 2000 lines in file 2 (i=0 to 1999), 3000 lines in file 3 (i=0 to 2999), 4000 lines in file 4 (i=0 to 3999),...etc.

In all case the datavec_B.dat and datavec_C.dat are exactly contain 200 lines and a single matrix of 3x3 respectively.
But naturally I'll have number of 10 datavec_B.dat, datavec_C.dat according to the method I mentioned above.

Murtan 317 Practically a Master Poster · Answer 11 · 2009-12-15T16:14:58+00:00

Ok, first, based on your answers to the questions, the file sizes represented do NOT warrant the work and/or effort to split the files up. In fact, if you're going to use this data for anything useful on any kind of recurring basis, I suspect you'll regret splitting the files.

I'll still help you split the output if you're sure you want to, but it doesn't make any sense to me.

Your answers to the questions were all fine until you got to the last one. Your answer to it is inconsistent with your previous answers.

The first set of answers seem to indicate a desire to split the data into equal size files. The last answer seems to indicate that you want the data from the first record (line 0) in all ten output files.

Which is the way you really want it to work? (You pretty much need to pick one unless you're planning to give the user a way to make a choice.)

In the discussion of hbins, you seem to set a maximum at 200 bins (which would be 201 lines with the header line), but you also seem to still indicate that there would be more than one file.

Do you plan to repeat the data in all of the files, or would the files contain only the binning data for the entries in the matching data file? (i.e. dataVec_B1 would contain binning data for ONLY the data in dataVec_A1; while dataVec_B2 would contain the binning data for ONLY the data in dataVecA2)

As far as the formula for file parts (which in my sample was dvaIndex = 1 + i / 200 ) I could easily generate additional formulas, but it should also be trivial for you. Please make an effort to generate at least the following formulas. The practice will be good for you, I'd expect you'll be doing something like this again. (If you'll try to write the formulas I'll review them and help you get them right.)

The given formula will break the file up into 200 line pieces.

What would the formula be to break the file up into 500 line pieces?

What would the formula be to break the file up into 1000 line pieces?

What would the formula be to break the file into quarters (1/4 of the data in each file)?
(You can use N, the total number in the formula)

What would the formula be to break the file into tenths (1/10 of the data in each file)?

idgeza 0 Newbie Poster · Answer 12 · 2009-12-15T16:59:29+00:00

Let me clarify the method:

1.) I originally have a for loop, in which I generate numbers of N data, i.e. numbers of N vec (vec, where i goes from 0 to N), which are just vectors of dimension 3.

2.) Then I take some function of vec, and get cosangle (cosangle = vec_cosangle(vec0,vec)), which is just a float number.

3.) From the given cosangle data I make a histogram having constant bin size, e.g. hbins=200.

4.) From the given vec data I make a matrix of 3x3 with the code m.init(vec).

So far I have three different data set, accroding to the above, which I can print out three different files, let us say datavecA.dat, datavecB.dat and datavecC.dat. (Please, see my whole code somewhere in my post.)

Now i would like something different, but not totally different.

I'd just like to printout the generated datas at times to see what values they really have. So if I start to generate the datas, for example after i=1000 iteration I want to print out the data I get until then, i.e. -> printout datavecA_1.dat (cosangle, containing 1000 lines), -> datavecB_1.dat (histogram according to cosangle, containing 200 lines) and finally -> datavecC_1.dat (the matrix from vec, containing a single matrix of 3x3).

I am thinking of something like this part (I hope I am not ambiguous...)

ofstream f1;
				f1.open("vecdata_1.dat");

		       	 	ofstream f2;
				f2.open("vecdata_2.dat");


	for(int i=0; i<N; ++i)
	{

			// ... there are some transformation stuff previously

			cosangle[i] = vec_cosangle(vec0,vec);

			if ( i >=0 && i<100 ){

				f1 << cosangle[i] << endl;

			}
			else if ( i >=0 && i<200 ){

				f2 << cosangle[i] << endl;

			}
			else { 
			cout << "There is something wrong..." << endl;
			}

		
	}  
f1.close();
f2.close();

idgeza 0 Newbie Poster · Answer 13 · 2009-12-15T18:58:24+00:00

I transcripted my code according to your suggestions, and here it is (is it right?)

But I get only datavecA_1, datavec_A2,... in this way. How can I get datavecB_1, datavecB_2,... and datavecC_1, datavecC_2,... ?

for (i = 0; i < N; i++) 
	{      
	
	dvaIndex = 1 + i / 200;
	if (dvaIndex != dvaIndexLast){

		cosangle[i] = vec_angle(vec0,vec);

		ofDva.close();

		ostringstream tfn;
		tfn << "datavec_A" << dvaIndex << ".dat";
		ofDva.open(tfn.str().c_str());	

	}
	dvaIndexLast = dvaIndex;	

	  	ofDva << cosangle[i] << endl;

		int bin=(int) ((cosangle[i]-hmin)/(hmax-hmin)*hbins);
		if ( bin<0 ) bin=0;
		if ( bin>hbins-1 ) bin=hbins-1;
		h[bin]++;

		//code here to make a matrix from vec[i]
	        m[i].init(vec);
    	 }  // this is the end of the big loop through the N vectors

	ofDva.close();

		// Write the histogram into file:
		ofstream fileB;
		file.open("datavecB.dat");
		// Write a legend line:
		fileB << "#position\tvalue\t\thalf-binwidth\terror" << endl;
		for ( int bin=0 ; bin<hbins ; bin++ )
		{
		// Position of histogram bin:
		double X=hmin+(bin+0.5)*(hmax-hmin)/hbins;
		// Half-binwidth of histogram bin:
		double dX=0.5*(hmax-hmin)/hbins;
		// Entries in histogram bin:
		double Y=h[bin];
		// Error of entries in histogram bin:
		double dY=sqrt(h[bin]);
		// Write the bin into file:
		fileB << " " << X << "\t\t" << Y << "\t\t" << dX << "\t\t" << dY << endl;
		}
		fileB.close();

		delete[] h;


    	// will hold the sum of all of the matrices
    	// code to initialize the sum matrix to all zeros
    	Matrix3x3 sum = m[0]; 

	ofstream fileC;
	fileC.open("datavecC.dat")
    	for (i=0;i<N;i++) {
        // use the '+= operator to add each m[i] to sum
	sum += m[i];
    	}
        // use the /= operator to divide the sum matrix by scalar N
	sum /= N;
	fileC << sum << endl;
	fileC.close();

Murtan 317 Practically a Master Poster · Answer 14 · 2009-12-15T21:41:54+00:00

At first glance, I thought the first code you posted showed the data going to two files at once (if i < 100), but then I noticed the else (on line 6 of the code below). As written, the first 100 lines would go in file 1, the next 100 lines in file 2.

if ( i >=0 && i<100 ){

				f1 << cosangle[i] << endl;

			}
			else if ( i >=0 && i<200 ){

				f2 << cosangle[i] << endl;

			}
			else { 
			cout << "There is something wrong..." << endl;
			}

The key limitation with the above method is that all of the files have to be open at once and open files is a potentially limited resource. (If you wanted 3 data files x 10 sets, you have to have 30 files open before you start.)

Regarding the last code you posted, that is how I first anticipated the file output looking. The following discussion gets into how I would go about having separate datavec_B files as well.

Regarding the datavec_B files. If you want one for each datavec_A file that only has the data for that file, you would add the output and reset for the histogram data before opening the next datavec_A file.

if (dvaIndex != dvaIndexLast)
	{
		// Output anything necessary before we close the file
		// --- fill in anything you think is appropriate

		// close the file we're done with this part
		ofDva.close();

		// *** You can output and reset the histogram here ***
		ostringstream tfnB;
		tfnB << "datavec_B" << dvaIndexLast << ".dat";
		ofDvb.open(tfnB.str().c_str());
		ofDvb << "#position\tvalue\t\thalf-binwidth\terror" << endl;
		for ( int bin = 0 ; bin < hbins ; bin++ )
		{
			// Position of histogram bin:
			double X = hmin + (bin + 0.5) * (hmax - hmin) / hbins;
			// Half-binwidth of histogram bin:
			double dX=0.5*(hmax-hmin)/hbins;
			// Entries in histogram bin:
			double Y=h[bin];
			// Error of entries in histogram bin:
			double dY=sqrt(h[bin]);
			// Write the bin into file:
			ofDvb << " " << X << "\t\t" << Y << "\t\t" << dX << "\t\t" << dY << endl;
			// *** If you want to reset the count do it here ***
			h[bin] = 0;
		}
		ofDvb.close();

		// *** If you want to have seperate datavec_C files, add the output here ***

		// generate the new filename
		ostringstream tfn;
		tfn << "datavec_A" << dvaIndex << ".dat";
		// open the file
		ofDva.open(tfn.str().c_str());
	}
	dvaIndexLast = dvaIndex;

If you wanted separate datavec_C files as well, add it between the close of ofDvb (note that I didn't declare it, you'll have to add it) and where the new name for ofDva is generated.

Note also that because we close ofDva first, we could technically have reused it. I was torn at the time, but decided that I would rather have a stream for each output file to remove any question as to whether data will end up in the right file or not.

idgeza 0 Newbie Poster · Answer 15 · 2009-12-15T22:06:19+00:00

I am trying to fulfill this, but I can't. Could you explicitly write down what are you thinking about...?

What about this part

int bin=(int) ((cosangle[i]-hmin)/(hmax-hmin)*hbins);
		if ( bin<0 ) bin=0;
		if ( bin>hbins-1 ) bin=hbins-1;
		h[bin]++;
/************************************/
		//code here to make a matrix from vec[i]
	        m[i].init(vec);
                Matrix3x3 sum = m[0]; 
                sum += m[i];

I am here right now:

// We need to declare the output stream we will write from inside the loop and open the file for output
	ofstream ofDva("datavec_A1.dat");
	// Two integers for the current datavecA index and the last datavecA index
	int dvaIndex = 1;
	int dvaIndexLast = dvaIndex;

	for (i = 0; i < N; i++) 
	{      
	
	cosangle[i] = vec_angle(vec0,vec);

	dvaIndex = 1 + i / 200;

	if (dvaIndex != dvaIndexLast){

		ofDva << cosangle[i] << endl;

		ofDva.close();

		ostringstream tfnB;
		tfnB << "datavec_B" << dvaIndexLast << ".dat";
		ofDvb.open(tfnB.str().c_str());
		ofDvb << "#position\tvalue\t\thalf-binwidth\terror" << endl;

		for ( int bin = 0 ; bin < hbins ; bin++ )
		{
		// Position of histogram bin:
		double X = hmin + (bin + 0.5) * (hmax - hmin) / hbins;
		// Half-binwidth of histogram bin:
		double dX=0.5*(hmax-hmin)/hbins;
		// Entries in histogram bin:
		double Y=h[bin];
		// Error of entries in histogram bin:
		double dY=sqrt(h[bin]);
		// Write the bin into file:
		ofDvb << " " << X << "\t\t" << Y << "\t\t" << dX << "\t\t" << dY << endl;
		h[bin] = 0;

		}
 		ofDvb.close();

		ostringstream tfn;
		tfn << "datavec_A" << dvaIndex << ".dat";
		ofDva.open(tfn.str().c_str());
	}
	dvaIndexLast = dvaIndex;	

		int bin=(int) ((cosangle[i]-hmin)/(hmax-hmin)*hbins);
		if ( bin<0 ) bin=0;
		if ( bin>hbins-1 ) bin=hbins-1;
		h[bin]++;

		//code here to make a matrix from vec[i]
	        m[i].init(vec);
    	 }  // this is the end of the big loop through the N vectors

		delete[] h;


    	// will hold the sum of all of the matrices
    	// code to initialize the sum matrix to all zeros
    	Matrix3x3 sum = m[0]; 

	ofstream fileC;
	fileC.open("datavecC.dat")
    	for (i=0;i<N;i++) {
        // use the '+= operator to add each m[i] to sum
	sum += m[i];
    	}
        // use the /= operator to divide the sum matrix by scalar N
	sum /= N;
	fileC << sum << endl;
	fileC.close();
return 0;
}

Murtan 317 Practically a Master Poster · Answer 16 · 2009-12-16T01:27:48+00:00

Not sure what you're asking about the first code block.

In the second code block, lines 10 and 16 (where you set and output cosangle) need to be past line 46 (where dvaIndexLast = dvaIndex)

And after the 'big loop' ends, we will need to output the histogram data again (datavec_B files), before the data is deleted on line 57.
It might make sense to write a function to output the histogram data to a stream. It might also make sense to have that function (or another that uses it) generate the filename and open the stream as well. Then we could call that function from the two places we write the file.

Did you want multiple datavec_C files? Explain the difference in content between datavec_C1 and datavec_C2

idgeza 0 Newbie Poster · Answer 17 · 2009-12-16T14:27:53+00:00

Did you want multiple datavec_C files? Explain the difference in content between datavec_C1 and datavec_C2

Yes. The difference between datavec_C1 and dataec_C2 files is obvious: I generate them from different data sets.

Please, correct my code, because I don't see the errors...I got into a muddle :(

Murtan 317 Practically a Master Poster · Answer 18 · 2009-12-16T14:45:42+00:00

It has NEVER been my intent to write the code for you, and if I did, it would all be to one file as I discussed previously. If you want the output in multiple files, you'll have to do the work.

Regarding the datavec_C files, I was hoping for a clearer explanation of where the data sets come from...if they are the summary of the matching datavec_A files, then they will be written in the same area of code where the datavec_B files are.

idgeza 0 Newbie Poster · Answer 19 · 2009-12-16T17:10:31+00:00

What's wrong with this version of code? I have no idea... Please help me to solve this!

// We need to declare the output stream we will write from inside the loop and open the file for output
	ofstream ofDva("datavec_A1.dat");
	ofstream ofDvb("datavec_B1.dat");
	ofstream ofDvc("datavec_C1.dat");
	// Two integers for the current datavecA index and the last datavecA index
	int dvaIndex = 1;
	int dvaIndexLast = dvaIndex;

	for (i = 0; i < N; i++) 
	{      
	
	dvaIndex = 1 + i / 200;

	if (dvaIndex != dvaIndexLast){

		cosangle[i] = vec_angle(vec0,vec);

		ofDva.close();		

		int bin=(int) ((cosangle[i]-hmin)/(hmax-hmin)*hbins);
		if ( bin<0 ) bin=0;
		if ( bin>hbins-1 ) bin=hbins-1;
		h[bin]++;

		ofDvb.close();

	        m[i].init(vec);

		ofDvc.close();

		ostringstream tfn;
		tfn << "datavec_A" << dvaIndex << ".dat";
		ofDva.open(tfn.str().c_str());
		
		ostringstream tfnB;
		tfnB << "datavec_B" << dvaIndexLast << ".dat";
		ofDvb.open(tfnB.str().c_str());
		ofDvb << "#position\tvalue\t\thalf-binwidth\terror" << endl;

		for ( int bin = 0 ; bin < hbins ; bin++ )
		{
		// Position of histogram bin:
		double X = hmin + (bin + 0.5) * (hmax - hmin) / hbins;
		// Half-binwidth of histogram bin:
		double dX=0.5*(hmax-hmin)/hbins;
		// Entries in histogram bin:
		double Y=h[bin];
		// Error of entries in histogram bin:
		double dY=sqrt(h[bin]);
		// Write the bin into file:
		ofDvb << " " << X << "\t\t" << Y << "\t\t" << dX << "\t\t" << dY << endl;
		h[bin] = 0;

		}
 		ofDvb.close();


	}
	dvaIndexLast = dvaIndex;	
	
	ofDva << cosangle[i] << endl;


    	 }  // this is the end of the big loop through the N vectors
	
	ofDva.close();

	Matrix3x3 sum = m[0]; 
	sum +=m[i];
	sum /= N;
	ofDvc << sum << endl;

	ofDvc.close();
	
	delete[] h;

These are my data files in a command line after ls -lrt:

-rw-r--r-- 1 idg idg        0 2009-12-16 12:04 datavec_C1.dat
-rw-r--r-- 1 idg idg      870 2009-12-16 12:04 datavec_A1.dat
-rw-r--r-- 1 idg idg     4137 2009-12-16 12:04 datavec_B1.dat
-rw-r--r-- 1 idg idg     1977 2009-12-16 12:04 datavec_A2.dat

Murtan 317 Practically a Master Poster · Answer 20 · 2009-12-16T19:38:38+00:00

@idgeza
What is it doing differently from what you want or expect?

If it missing the B2 file, you need to add a write to the last B2 file after the 'big' loop completes. The write will be almost identical to the one inside the big loop. I would place it after you close ofDva

@jawadmardan
If you have a question and want an answer, put it in it's own thread, don't post it in a thread unrelated to your question.

data printout (continually) into many files

Recommended Answers Collapse Answers

All 25 Replies

Recommended Answers