I have a SSD and I am trying to use it to simulate my program I/O performance, however, IOPS calculated from my program is much much faster than IOMeter.
My SSD is PLEXTOR PX-128M3S, by IOMeter, its max 512B random read IOPS is around 94k (queue depth is 32).
However my program (32 windows threads) can reach around 500k 512B IOPS, around 5 times of IOMeter! I did data validation but didn’t find any error in data fetching. It’s because my data fetching in order?
I paste my code belwo (it mainly fetch 512B from file and release it; I did use 4bytes (an int) to validate program logic and didn’t find problem), can anybody help me figure out where I am wrong?
Thanks so much in advance!!
#include <stdio.h>
#include <Windows.h>
//Global variables
long completeIOs = 0;
long completeBytes = 0;
int threadCount = 32;
unsigned long long length = 1073741824; //4G test file
int interval = 1024;
int resultArrayLen = 320000;
int *result = new int[resultArrayLen];
//Method declarison
double GetSecs(void); //Calculate out duration
int InitPool(long long,char*,int); //Initialize test data for testing, if successful, return 1; otherwise, return a non 1 value.
int * FileRead(char * path);
unsigned int DataVerification(int*, int sampleItem); //Verify data fetched from pool
int main()
{
int sampleItem = 0x1;
char * fPath = "G:\\workspace\\4G.bin";
unsigned int invalidIO = 0;
if (InitPool(length,fPath,sampleItem)!= 1)
printf("File write err... \n");
//start do random I/Os from initialized file
double start = GetSecs();
int * fetchResult = FileRead(fPath);
double end = GetSecs();
printf("File read IOPS is %.4f per second.. \n",completeIOs/(end - start));
//start data validation, for 4 bytes fetch only
// invalidIO = DataVerification(fetchResult,sampleItem);
// if (invalidIO !=0)
// {
// printf("Total invalid data fetch IOs are %d", invalidIO);
// }
return 0;
}
int InitPool(long long length, char* path, int sample)
{
printf("Start initializing test data ... \n");
FILE * fp = fopen(path,"wb");
if (fp == NULL)
{
printf("file open err... \n");
exit (-1);
}
else //initialize file for testing
{
fseek(fp,0L,SEEK_SET);
for (int i=0; i<length; i++)
{
fwrite(&sample,sizeof(int),1,fp);
}
fclose(fp);
fp = NULL;
printf("Data initialization is complete...\n");
return 1;
}
}
double GetSecs(void)
{
LARGE_INTEGER frequency;
LARGE_INTEGER start;
if(! QueryPerformanceFrequency(&frequency))
printf("QueryPerformanceFrequency Failed\n");
if(! QueryPerformanceCounter(&start))
printf("QueryPerformanceCounter Failed\n");
return ((double)start.QuadPart/(double)frequency.QuadPart);
}
class input
{
public:
char *path;
int starting;
input (int st, char * filePath):starting(st),path(filePath){}
};
//Workers
DWORD WINAPI FileReadThreadEntry(LPVOID lpThreadParameter)
{
input * in = (input*) lpThreadParameter;
char* path = in->path;
FILE * fp = fopen(path,"rb");
int sPos = in->starting;
// int * result = in->r;
if(fp != NULL)
{
fpos_t pos;
for (int i=0; i<resultArrayLen/threadCount;i++)
{
pos = i * interval;
fsetpos(fp,&pos);
//For 512 bytes fetch each time
unsigned char *c =new unsigned char [512];
if (fread(c,512,1,fp) ==1)
{
InterlockedIncrement(&completeIOs);
delete c;
}
//For 4 bytes fetch each time
/*if (fread(&result[sPos + i],sizeof(int),1,fp) ==1)
{
InterlockedIncrement(&completeIOs);
}*/
else
{
printf("file read err...\n");
exit(-1);
}
}
fclose(fp);
fp = NULL;
}
else
{
printf("File open err... \n");
exit(-1);
}
}
int * FileRead(char * p)
{
printf("Starting reading file ... \n");
HANDLE mWorkThread[256]; //max 256 threads
completeIOs = 0;
int slice = int (resultArrayLen/threadCount);
for(int i = 0; i < threadCount; i++)
{
mWorkThread[i] = CreateThread(
NULL,
0,
FileReadThreadEntry,
(LPVOID)(new input(i*slice,p)),
0,
NULL);
}
WaitForMultipleObjects(threadCount, mWorkThread, TRUE, INFINITE);
printf("File read complete... \n");
return result;
}
unsigned int DataVerification(int* result, int sampleItem)
{
unsigned int invalid = 0;
for (int i=0; i< resultArrayLen/interval;i++)
{
if (result[i]!=sampleItem)
{
invalid ++;
continue;
}
}
return invalid;
}
I didn’t look in enough detail to be certain, but I didn’t see any code there to flush the data to the disk and/or ensure your reads actually came from the disk. That being the case, it appears that what you’re measuring is primarily the performance of the operating system’s disk caching. While the disk might contribute a little to the performance you’re measuring, it’s probably only a small contributor, with other factors dominating.
Since the code is apparently written for Windows, you might consider (for one example) opening the file with CreateFile, and passing the FILE_FLAG_NO_BUFFERING flag when you do so. This will (at least mostly) remove the operating system cache from the equation, and force each read or write to deal directly with the disk itself.