right now, I am allocating a dynamic 2D array for my rasterizer's edge list. My setup looks like this:

struct EDGE
{
	float x, dx;
	float uvws[4]; //u, v, 1/z, shade
	float duvws[4];
	float y_limit[2];
	bool active;
};
struct EDGERECORD
{
	EDGE *left_edge;
	EDGE *right_edge;
	EDGE edges[9];
	UINT16 num_edges;
};

//on project initialization:
GET = (EDGERECORD**)calloc(number_of_objects, sizeof(EDGERECORD*));
//for each new object added:
GET[object_index] = new EDGERECORD[object[object_index].number_of_faces];

I need this whole thing here setup so that I can use SSE commands on these two variables:

float uvws[4]; //u, v, 1/z, shade
float duvws[4];// delta u, delta v, ect..

which will allow me to do all the increments at the same time.

I have gotten it to work with separate variables like this:

float *huvwsh;
	float *hduvwsh;
	float *pdx;
	__m128* m128_base;
	__m128* m128_delta;
	__m128* m128_dx;

	huvwsh = (float*)_aligned_malloc(4 * sizeof(float), 16);
	hduvwsh = (float*)_aligned_malloc(4 * sizeof(float), 16);
	pdx = (float*)_aligned_malloc(4 * sizeof(float), 16);

	m128_base = (__m128*)huvwsh;
	m128_delta = (__m128*)hduvwsh;
	m128_dx = (__m128*)pdx;

	*m128_base = _mm_set_ps(l_edge->uvwsh[0], l_edge->uvwsh[1], l_edge->uvwsh[2], l_edge->uvwsh[3]);
	*m128_delta = _mm_set_ps(r_edge->uvwsh[0], r_edge->uvwsh[1], r_edge->uvwsh[2], r_edge->uvwsh[3]);
	*m128_delta = _mm_sub_ps(*m128_delta, *m128_base);
	*m128_dx = _mm_set_ps1(r_edge->x - l_edge->x);
	*m128_delta = _mm_div_ps(*m128_delta, *m128_dx);
	//blah blah blah
	*m128_base = _mm_add_ps(*m128_base, *m128_delta);

but I need to know how to allocate everything from the first example so I can skip the step of copying everything out to a separate array before working on it.

Thanks to anyone that can help!

ok, got it.
solution was:

__declspec(align(16)) struct EDGE
{
	float uvwsh[4]; //this,
	float duvwsh[4]; //and this at begining of struct, static align struct
	__m128 *m_uvwsh;
	__m128 *m_duvwsh;
	float x, dx;
	float y_limit[2];
	bool active;
};
struct EDGERECORD
{
	EDGE edges[9];
	EDGE *left_edge;
	EDGE *right_edge;
	UINT16 num_edges;
};
//aligned allocate each object's edge records instead of using "new"
GET[object_index] = (EDGERECORD*)_aligned_malloc(object[object_index].number_of_faces * sizeof(EDGERECORD), 16);
Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.