I have a multithreaded openCV program that uses 4 threads to do the following:
Thread 1->calls cvQueryFrame() which grabs the frame images from the camera one by one and stores them into a std::vector inputBuffer
Thread 2->performs thresholding on inputBuffer[0], copies result to another std::vector called filterOutputBuffer
Thread 3->performs optical flow algorithm / draws flow field for the first two elements in filterOutputBuffer, copies result to another std::vector called ofOutputBuffer
Thread 4->displays the image using cvShowImage(ofOutputBuffer[0])
So essentially I was envisioning each thread performing the task on the first element of the corresponding input vector/buffer and storing the result at the back of the corresponding output vector. Sort of like 3 factory workers doing their part on the assembly line, then throwing the end result into a bucket for the next guy.
I setup mutexes for all of the buffers and the program works, only the output is delayed several seconds from the live camera stream.
I ran a non-multithreaded version of the same program (that used one giant while(true) loop) and it ran in real-time with only the occasional stutter.
Why is my concurrent implementation delayed in performance so much?
Below are the thread functions:
void writeBuffer()
{
cout << "Thread " << GetCurrentThreadId() << ": Capturing frame from camera!" << endl;
CvCapture *capture = 0;
IplImage *frame = 0;
DWORD waitResult;
if (!(capture = cvCaptureFromCAM(0)))
cout << "Cannot initialize camera!" << endl;
//now start grabbing frames and storing into the vector inputBuffer
while (true)
{
//cout << "Thread " << GetCurrentThreadId() << ": Waiting for mutex to write to input buffer!..." << endl;
waitResult = WaitForSingleObject(hMutex, INFINITE);
switch(waitResult)
{
// The thread got ownership of the mutex
case WAIT_OBJECT_0:
frame = cvQueryFrame(capture); //store the image into frame
if(!frame)
{
cout << "Thread " << GetCurrentThreadId() << ": Error capturing frame from camera!" << endl;
}
//cout << "Thread " << GetCurrentThreadId() << ": Getting Frame..." << endl;
inputBuffer.push_back(*frame);
break;
default:
cout << "Thread " << GetCurrentThreadId() << ": Error acquiring mutex..." << endl;
}
if(!ReleaseMutex(hMutex))
{
cout << "Thread " << GetCurrentThreadId() << ": Error releasing mutex..." << endl;
}
//else cout << "Thread " << GetCurrentThreadId() << ": Done writing to input buffer, Mutex Released!" << endl;
//signal hDoneGettingFrame
PulseEvent(hDoneGettingFrame);
}
cout << "Thread " << GetCurrentThreadId() << ": Exiting..." << endl;
}
void opticalFlow()
{
...
DWORD waitResult;
//start grabbing frames from the vector inputBuffer
cout << "Thread " << GetCurrentThreadId() << ": Waiting to read from input buffer..." << endl;
while(true)
{
waitResult = WaitForSingleObject(fMutex, INFINITE);
switch(waitResult)
{
// The thread got ownership of the mutex
case WAIT_OBJECT_0:
//grab first two frames from buffer (inputBuffer[0-1]) and process them
if(filterOutputBuffer.size() > 1)
{
frame1 = filterOutputBuffer[0];
frame2 = filterOutputBuffer[1];
filterOutputBuffer.erase(filterOutputBuffer.begin());
}
else
{
if(!ReleaseMutex(fMutex))
cout << "Thread " << GetCurrentThreadId() << ": Error releasing filter mutex..." << endl;
//else cout << "Thread " << GetCurrentThreadId() << ": Input Buffer empty!" << endl;
continue;
}
break;
default:
cout << "Thread " << GetCurrentThreadId() << ": Error acquiring input mutex..." << endl;
continue;
}
if(!ReleaseMutex(fMutex))
{
cout << "Thread " << GetCurrentThreadId() << ": Error releasing input mutex..." << endl;
}
...
//Do optical flow stuff
...
waitResult = WaitForSingleObject(oMutex, INFINITE);
switch(waitResult)
{
// The thread got ownership of the mutex
case WAIT_OBJECT_0:
//cout << "Thread " << GetCurrentThreadId() << ": WRITING TO OUTPUT BUFFER..." << endl;
ofOutputBuffer.push_back(*frame1_3C);
break;
default:
cout << "Thread " << GetCurrentThreadId() << ": Error acquiring output mutex..." << endl;
}
if(!ReleaseMutex(oMutex))
cout << "Thread " << GetCurrentThreadId() << ": Error releasing output mutex..." << endl;
}
cout << "Thread " << GetCurrentThreadId() << ": Exiting..." << endl;
}
void filterImage()
{
DWORD waitResult;
...
//start grabbing frames from the vector inputBuffer
cout << "Thread " << GetCurrentThreadId() << ": Waiting to read from input buffer..." << endl;
while(true)
{
waitResult = WaitForSingleObject(hMutex, INFINITE);
switch(waitResult)
{
// The thread got ownership of the mutex
case WAIT_OBJECT_0:
//grab first frame and then release mutex
if(inputBuffer.size() > 0)
{
frame = inputBuffer[0];
inputBuffer.erase(inputBuffer.begin());
}
else
{
if(!ReleaseMutex(hMutex))
cout << "Thread " << GetCurrentThreadId() << ": Error releasing input mutex..." << endl;
//else cout << "Thread " << GetCurrentThreadId() << ": Input Buffer empty!" << endl;
continue;
}
break;
default:
cout << "Thread " << GetCurrentThreadId() << ": Error acquiring input mutex..." << endl;
continue;
}
if(!ReleaseMutex(hMutex))
{
cout << "Thread " << GetCurrentThreadId() << ": Error releasing input mutex..." << endl;
}
...
//Tresholding Image Stuff
...
//cout << "Thread " << GetCurrentThreadId() << ": Waiting to write to output buffer..." << endl;
waitResult = WaitForSingleObject(fMutex, INFINITE);
switch(waitResult)
{
// The thread got ownership of the mutex
case WAIT_OBJECT_0:
//cout << "Thread " << GetCurrentThreadId() << ": WRITING TO OUTPUT BUFFER..." << endl;
filterOutputBuffer.push_back(*out);
break;
default:
cout << "Thread " << GetCurrentThreadId() << ": Error acquiring filter mutex..." << endl;
}
if(!ReleaseMutex(fMutex))
cout << "Thread " << GetCurrentThreadId() << ": Error releasing filter mutex..." << endl;
}
}
void displayImage()
{
DWORD waitResult;
IplImage final;
int c;
cvNamedWindow("Image", CV_WINDOW_AUTOSIZE);
//start grabbing frames from the vector ouputBuffer
cout << "Thread " << GetCurrentThreadId() << ": Waiting to read from output buffer..." << endl;
while (true)
{
waitResult = WaitForSingleObject(oMutex, INFINITE);
switch(waitResult)
{
// The thread got ownership of the mutex
case WAIT_OBJECT_0:
if(ofOutputBuffer.size() > 0)
{
//cout << "Thread " << GetCurrentThreadId() << ": Reading output buffer..." << endl;
final = ofOutputBuffer[0];
ofOutputBuffer.erase(ofOutputBuffer.begin());
}
else
{
if(!ReleaseMutex(oMutex))
cout << "Thread " << GetCurrentThreadId() << ": Error releasing output mutex..." << endl;
//else cout << "Thread " << GetCurrentThreadId() << ": Output Buffer is empty!" << endl;
continue;
}
break;
default:
cout << "Thread " << GetCurrentThreadId() << ": Error acquiring output mutex..." << endl;
continue;
}
if(!ReleaseMutex(oMutex))
cout << "Thread " << GetCurrentThreadId() << ": Error releasing input mutex..." << endl;
//else cout << "Thread " << GetCurrentThreadId() << ": Done reading output buffer, mutex Released!" << endl;
//cout << "Thread " << GetCurrentThreadId() << ": Displaying Image..." << endl;
cvShowImage("Image", &final);
c = cvWaitKey(1);
}
cout << "Thread " << GetCurrentThreadId() << ": Exiting..." << endl;
}
Here is the main function:
void main()
{
hMutex = CreateMutex(NULL, FALSE, NULL);
oMutex = CreateMutex(NULL, FALSE, NULL);
fMutex = CreateMutex(NULL, FALSE, NULL);
hDoneGettingFrame = CreateEvent(NULL, TRUE, FALSE, NULL);
hDoneReadingFrame = CreateEvent(NULL, TRUE, FALSE, NULL);
TName[0]= CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)writeBuffer, NULL, 0, &ThreadID);
TName[1]= CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)filterImage, NULL, 0, &ThreadID);
TName[2]= CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)opticalFlow, NULL, 0, &ThreadID);
TName[3]= CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)displayImage, NULL, 0, &ThreadID);
WaitForMultipleObjects(4, TName, TRUE, INFINITE);
CloseHandle(TName);
}
Semaphores did the trick! Instead of using separate mutexes, I just created a semaphore and let all the threads work through that.
Thanks, it’s running fast and smooth now!
And in the threads…