kenny1989 0 Junior Poster in Training

Hi, I'm having trouble with getting my cuda program to work, apparently it is something to do with the way I call the kernel and what I'm telling it to do compare to what I want it to do.

OK, what I want it to do is (for now):

*input x*
*maths calculation using x*
*set answer to maths calculation as x*
*do this 100 times*
*output x*

Here is my code:

#include <string>
#include <cstdlib>
#include <iostream>
#include <cmath>


__global__ void runge_4(float *x){
*x = *x + 0.01;

    }

using namespace std;

float x = 1;
float *gpu;


LARGE_INTEGER numTicksPerSecond;
LARGE_INTEGER startTime;
LARGE_INTEGER endTime; 

int main(){
	
	QueryPerformanceFrequency(&numTicksPerSecond);
    	QueryPerformanceCounter(&startTime);

	cudaMalloc((void**)&gpu, sizeof(float));	
    
     	cudaMemcpy(gpu,&x,sizeof(float),cudaMemcpyHostToDevice);
     	runge_4<<<1, 100>>>(gpu);
    	cudaMemcpy(&x, gpu,sizeof(float),cudaMemcpyDeviceToHost);
    
    	QueryPerformanceCounter(&endTime);
 
	LONGLONG numTicks = endTime.QuadPart - startTime.QuadPart;

	double numSeconds = (((double) numTicks) / (double) numTicksPerSecond.QuadPart);

	cout << "Num Ticks Per Second : " << numTicksPerSecond.QuadPart << endl;

	cout << "Start " << startTime.QuadPart << endl;

	cout << "End : " << endTime.QuadPart << endl;

	cout << "Num Ticks : " << numTicks << endl;

	cout << "Num seconds : " << numSeconds << endl;
	
	
	cout<<x<<endl;

cudaFree(gpu);


return 0;
}

The output from this code is 1.01, athough I tell the kernel to run 100 times, and changing the numbers in line 30 produce some weird results:

e.g.

runge_4<<<100, 100>>>(gpu); gives an output of 1.07
runge_4<<<1, 1000>>>(gpu); gives an output of 1