denizen08 0 Newbie Poster

I have a project that requires us to perform matrix multiplication via multi-threading, using the Windows API.

I've got the basic code running and everything is well under sane/ideal conditions. But if my input matrices go beyond 10x10 the program ends up failing on WaitOnMultipleObjects(...) and runs the main thread through without waiting for the rest to complete computation.

My solution computes for each element of the resultant matrix. Below is the code:

Attached are the rest of my source files in case you want to compile it.

#include "stdafx.h"
#include <cstdlib>
#include <iostream>
#include <Windows.h>
#include <process.h>
#include <strsafe.h>
#include "random.h"

using namespace std;

class Matrix {
private:
  double **_matrixValues;
  unsigned int _rowSize, _columnSize;

public:
  Matrix() {
    _rowSize = 0;
    _columnSize = 0;
    _matrixValues = 0;
  }
  Matrix(const unsigned int row_size, const unsigned int column_size) {
    _rowSize = row_size;
    _columnSize = column_size;
    _matrixValues = (double**)malloc(sizeof(double*)*_rowSize);
    for(unsigned int i =0 ; i <_rowSize;i++){
      *(_matrixValues+i) = (double*)malloc(sizeof(double)*_columnSize);
      for(unsigned int j = 0; j < _columnSize; ++j) {
        _matrixValues[i][j] = 0;
      }
    }
  }
  /*~Matrix() {
    for(int i = 0; i < _rowSize; i++)
		  free(_matrixValues[i]);
    free(_matrixValues);
    _rowSize = 0;
    _columnSize = 0;
  }*/
  double getValue_at (const unsigned int row_index,
    const unsigned int column_index){
    if(row_index >= _rowSize || column_index >= _columnSize)
      cerr << "[GETTER]Access Violation: Out of Bounds";
    else
      return _matrixValues[row_index][column_index];
  }
  void setValue_at (double value, const unsigned int row_index,
    const unsigned int column_index) {
    if((row_index >= _rowSize) || (column_index >= _columnSize))
       cerr << "[SETTER]Access Violation: Out of Bounds";
    else
      _matrixValues[row_index][column_index] = value;
  }
  const unsigned int ColumnSize() { return _columnSize; }
  const unsigned int RowSize() { return _rowSize; }
};

typedef struct ThreadedMatrixData {
  Matrix *A, *B, *resultantMatrix;
  unsigned int row_position, column_position;
  DWORD theadId;
} ThreadedMatrixComputationData, *ThreadedMatrixComputationDataPtr;


#define MAX_THREADS 3
#define BUF_SIZE 1024

DWORD WINAPI ThreadedMultiplier( LPVOID lpParam );
void ErrorHandler(LPTSTR lpszFunction);

void print_usage();

void randomMatrixFill(Matrix &matrix);
Matrix ThreadedMatrixMultiplier(Matrix &leftHandMatrix, Matrix &rightHandMatrix);

int main(int argc, char* argv[])
{
	const int a_row = 10, a_column = 4;
	const int b_row = 4, b_column = 10;
	Matrix A, B;

  A = Matrix(a_row,a_column);
  B = Matrix(b_row,b_column);
  printf("Generating randomized matrices:\n");
  randomMatrixFill(A);
  randomMatrixFill(B);

  printf("\tA:\n");
  for(int i = 0; i < a_row; ++i) {
    printf("\t");
    for(int j = 0; j < a_column; ++j) {
      printf("%f ", A.getValue_at(i,j));
    }
    printf("\n");
  }
  printf("\tB:\n");
  for(int i = 0; i < b_row; ++i) {
    printf("\t");
    for(int j = 0; j < b_column; ++j) {
      printf("%f ", B.getValue_at(i,j));
    }
    printf("\n");
  }

  printf("Performing threded Matrix Multiplication...\n");
  Matrix C = ThreadedMatrixMultiplier(A,B);
  printf("\nResultant Matrix C:\n");
  
  for(unsigned int i = 0; i < C.RowSize(); ++i) {
    printf("\t");
    for(unsigned int j = 0; j < C.ColumnSize(); ++j) {
      printf("%f ", C.getValue_at(i,j));
    }
    printf("\n");
  }

  system("PAUSE");
	return 0;
}


void randomMatrixFill(Matrix &matrix){
  RandNumGen X (-10.0, 10.0); // random coefs in range (-10,10)
  for (unsigned int i=0; i < matrix.RowSize(); i++) {
    for (unsigned int j=0; j < matrix.ColumnSize(); j++) {
      matrix.setValue_at(X.generate(),i,j);
    }
  }
}


Matrix ThreadedMatrixMultiplier(Matrix &leftHandMatrix, Matrix &rightHandMatrix) {
   //prepare new resultant matrix parameters
  unsigned int returnValue_rowSize, returnValue_columnSize;
  if(leftHandMatrix.ColumnSize() > rightHandMatrix.ColumnSize())
    returnValue_columnSize = leftHandMatrix.ColumnSize();
  else
    returnValue_columnSize = rightHandMatrix.ColumnSize();
  if(leftHandMatrix.RowSize() > rightHandMatrix.RowSize())
    returnValue_rowSize = leftHandMatrix.RowSize();
  else
    returnValue_rowSize = rightHandMatrix.RowSize();
  Matrix returnValue = Matrix(returnValue_rowSize, returnValue_columnSize);

  const unsigned int totalThreads = returnValue_rowSize*returnValue_columnSize;

  //prepare data and handlers for computation
  ThreadedMatrixComputationDataPtr *threadedMatrixComputationData;
  threadedMatrixComputationData = new ThreadedMatrixComputationDataPtr[totalThreads];
  DWORD *dwThreadIdArray;
  dwThreadIdArray = new DWORD[totalThreads];
  HANDLE *hThreadArray;
  hThreadArray = new HANDLE[totalThreads];
  HANDLE *hThreadHandleAssertie;  
  hThreadHandleAssertie = new HANDLE[totalThreads];
  
  printf("Creating %d threads to handle a %dx%d Matrix",
    totalThreads, returnValue_rowSize,returnValue_columnSize);

  // Allocate memory for thread data.
  for(unsigned int i = 0; i < totalThreads; i++) {
    threadedMatrixComputationData[i] = (ThreadedMatrixComputationDataPtr) HeapAlloc(
      GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(ThreadedMatrixComputationData) );
    
    if( threadedMatrixComputationData[i] == NULL ) {
        // If the array allocation fails, the system is out of memory
        // so there is no point in trying to print an error message.
        // Just terminate execution.
        ExitProcess(2);
    }

    // Provide references to data for each thread to work with.
    threadedMatrixComputationData[i]->A = &leftHandMatrix;
    threadedMatrixComputationData[i]->B = &rightHandMatrix;
    threadedMatrixComputationData[i]->resultantMatrix = &returnValue;
    threadedMatrixComputationData[i]->column_position = i%returnValue_columnSize;
    threadedMatrixComputationData[i]->row_position = i/returnValue_columnSize;
    // Time to actually create the thread to begin execution on its own, bitch.

    hThreadArray[i] = CreateThread( 
        NULL,                   // default security attributes
        0,                      // use default stack size  
        ThreadedMultiplier,       // thread function name
        threadedMatrixComputationData[i],          // argument to thread function 
        0,                      // use default creation flags | run immediately
        &dwThreadIdArray[i]);   // returns the thread identifier

    threadedMatrixComputationData[i]->theadId = dwThreadIdArray[i];
    
    // Check the return value for success.
    // If CreateThread fails, terminate execution. 
    // This will automatically clean up threads and memory. 

    if (hThreadArray[i] == NULL) {
        ErrorHandler(TEXT("CreateThread"));
        ExitProcess(3);
    } else {
      printf("\t->Launched Thread# %d, ID:%d\n", i, dwThreadIdArray[i]);
      //WaitForSingleObject(hThreadArray[i], INFINITE); //waiting for each thread would solve the problem, but defeat the purpose of using multiple threads. A simple, but obviously wrong work-around.
    }
  } // End of multiplier thread creation loop.
  
  DWORD res = WaitForMultipleObjects(totalThreads, hThreadArray, TRUE, INFINITE);
  
  // Close all thread handles and free memory allocations.
  // House cleaning after the thread parties! FTW.
  //for(unsigned int i=0; i<totalThreads; i++) {
  //    CloseHandle(hThreadArray[i]);
  //    if(threadedMatrixComputationData[i] != NULL) {
  //        HeapFree(GetProcessHeap(), 0, threadedMatrixComputationData[i]);
  //        threadedMatrixComputationData[i] = NULL;    // Ensure address is not reused.
  //    }
  //}
  return returnValue;
}

DWORD WINAPI ThreadedMultiplier( LPVOID lpParam ) { 
  HANDLE hStdout;
  ThreadedMatrixComputationDataPtr threadedMatrixComputationDataInstance;

  TCHAR msgBuf[BUF_SIZE];
  size_t cchStringSize;
  DWORD dwChars;

  // Make sure there is a console to receive output results. 

  hStdout = GetStdHandle(STD_OUTPUT_HANDLE);
  if( hStdout == INVALID_HANDLE_VALUE )
      return 1;

  // Cast the parameter to the correct data type.
  // The pointer is known to be valid because 
  // it was checked for NULL before the thread was created.
 
  threadedMatrixComputationDataInstance = (ThreadedMatrixComputationDataPtr)lpParam;
  unsigned int leftHandSideOperandBound = threadedMatrixComputationDataInstance->A->ColumnSize();
  unsigned int rightHandSideOperandBound = threadedMatrixComputationDataInstance->B->RowSize();
	double product = 0;
  for (unsigned int i=0; i<leftHandSideOperandBound; i++) {
    // leftHandSideMatrix only requires access to a single row span
    for(unsigned int j=0; j<rightHandSideOperandBound; j++) {
    // rightHandSideMatrix only requires access to a single column span
      product +=
        threadedMatrixComputationDataInstance
        ->A->getValue_at(threadedMatrixComputationDataInstance->row_position,i) * 
        threadedMatrixComputationDataInstance
        ->B->getValue_at(j,threadedMatrixComputationDataInstance->column_position);
    }
  }
	threadedMatrixComputationDataInstance->resultantMatrix->setValue_at(product,
    threadedMatrixComputationDataInstance->row_position,
    threadedMatrixComputationDataInstance->column_position);

  // Print the parameter values using thread-safe functions.
  StringCchPrintf(msgBuf, BUF_SIZE, TEXT("[%d]Finished computation at new matrix position: A(%d,-)xB(-,%d) -> C(%d, %d)\n"), 
    threadedMatrixComputationDataInstance->theadId,
    threadedMatrixComputationDataInstance->row_position,
    threadedMatrixComputationDataInstance->column_position,
    threadedMatrixComputationDataInstance->row_position,
    threadedMatrixComputationDataInstance->column_position); 
  StringCchLength(msgBuf, BUF_SIZE, &cchStringSize);
  WriteConsole(hStdout, msgBuf, (DWORD)cchStringSize, &dwChars, NULL);

  return 0;
} 

void ErrorHandler(LPTSTR lpszFunction) 
{ 
    // Retrieve the system error message for the last-error code.

    LPVOID lpMsgBuf;
    LPVOID lpDisplayBuf;
    DWORD dw = GetLastError(); 

    FormatMessage(
        FORMAT_MESSAGE_ALLOCATE_BUFFER | 
        FORMAT_MESSAGE_FROM_SYSTEM |
        FORMAT_MESSAGE_IGNORE_INSERTS,
        NULL,
        dw,
        MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
        (LPTSTR) &lpMsgBuf,
        0, NULL );

    // Display the error message.

    lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, 
        (lstrlen((LPCTSTR) lpMsgBuf) + lstrlen((LPCTSTR) lpszFunction) + 40) * sizeof(TCHAR)); 
    StringCchPrintf((LPTSTR)lpDisplayBuf, 
        LocalSize(lpDisplayBuf) / sizeof(TCHAR),
        TEXT("%s failed with error %d: %s"), 
        lpszFunction, dw, lpMsgBuf); 
    MessageBox(NULL, (LPCTSTR) lpDisplayBuf, TEXT("Error"), MB_OK); 

    // Free error-handling buffer allocations.

    LocalFree(lpMsgBuf);
    LocalFree(lpDisplayBuf);
}
Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.