I have a two huge text files whose format is as below.
File 1:
ID1,20
ID2,20
ID3,30
File 2:
ID3,75
ID1,84
ID2,70
Both these files contain more than 200,000 rows. I need to read both the files and create a third file in this format:
File 3:
ID1,20,84
ID2,20,70
ID3,30,75
ID can be any string that user inputs. Third file should be created by matching the ID in rows of file 1 to the id in rows of file 2. I have written a code but it takes a lot of time to generate File 3. The task at hand deals with parallelization, so I want the code to save me as much time as possible. Please suggest a faster and more efficient way to deal with this problem.
(Here is the code that I used)
public void positionCure(string afile,string bfile,string dfile)
{
string alphaFilePath = afile;
List<string> alphaFileContent = new List<string>();
using (FileStream fs = new FileStream(alphaFilePath, FileMode.Open))
using(StreamReader rdr = new StreamReader(fs))
{
while(!rdr.EndOfStream)
{
alphaFileContent.Add(rdr.ReadLine());
}
}
string betaFilePath = bfile;
StringBuilder sb = new StringBuilder();
using (FileStream fs = new FileStream(betaFilePath, FileMode.Open))
using (StreamReader rdr = new StreamReader(fs))
{
while(! rdr.EndOfStream)
{
string[] betaFileLine = rdr.ReadLine().Split(Convert.ToChar(","));
foreach (string alphaline in alphaFileContent)
{
string[] alphaFileLine = alphaline.Split(Convert.ToChar(","));
if (alphaFileLine[0].Equals(betaFileLine[0].ToString()))
{
sb.AppendLine(String.Format("{0}, {1}, {2}", betaFileLine[0], betaFileLine[1], alphaline.Substring(alphaline.IndexOf(Convert.ToChar(","))+1)));
}
}
}
}
using (FileStream fs = new FileStream(dfile, FileMode.Create))
using (StreamWriter writer = new StreamWriter(fs))
{
writer.Write(sb.ToString());
}
}
}
I would do something like: