Just write the below algorithm for my friend who is operating a call center and want to format his file names and move a directory to acording to months and dates. But the call center has over 3.5M files and seems like the program worked for 12 hours and just process less than 20gb
So is there any way exist for optimizing below algorithm,
class Program
{
// How much deep to scan. (of course you can also pass it to the method)
const int HowDeepToScan = 20;
static void Main(string[] args)
{
ProcessDir(@"E:\Hard Disk 2\", 1);
Console.WriteLine("Islem Bitmistir");
Console.ReadLine();
}
public static void ProcessDir(string sourceDir, int recursionLvl)
{
if (recursionLvl <= HowDeepToScan)
{
ChangeDirectories(sourceDir);
// Recurse into subdirectories of this directory.
string[] subdirEntries = Directory.GetDirectories(sourceDir);
foreach (string subdir in subdirEntries)
// Do not iterate through reparse points
if ((File.GetAttributes(subdir) &
FileAttributes.ReparsePoint) !=
FileAttributes.ReparsePoint)
ProcessDir(subdir+@"\",recursionLvl + 1);
}
}
public static void ChangeDirectories(string givenPath)
{
DataTable resultSet = new DataTable();
SqlDataAdapter adapter = new SqlDataAdapter();
SqlCommand cmd = new SqlCommand();
SqlConnection callCenterConnection = new SqlConnection(@"Integrated Security=SSPI;Persist Security Info=False;Initial Catalog=CallCenter;Data Source=.");
//Directory of mp3s
string sourceDir = givenPath;
//Get the files inside that directory
string[] fileEntries = Directory.GetFiles(sourceDir);
callCenterConnection.Open();
//Iterate through those files
foreach (string fullFileName in fileEntries)
{
//Get the file name without path and extension
string fileNameWithoutExtension = Path.GetFileNameWithoutExtension(fullFileName);
adapter = new SqlDataAdapter("SELECT TOP 1 ID,Time,Tel,AgentID FROM ResultTable WHERE ID=" + fileNameWithoutExtension, callCenterConnection);
adapter.Fill(resultSet);
}
adapter.Dispose();
if (resultSet.Rows.Count != 0)
{
foreach (DataRow dr in resultSet.Rows)
{
DateTime fileDate = Convert.ToDateTime(dr["Time"]);
if (!File.Exists(@"E:\Ses Dosyalari" + @"\" + fileDate.Year + @"\" + fileDate.Month + @"\" + Convert.ToString(dr["Time"]).Replace(":", ".") + " - " + Convert.ToString(dr["Tel"]) + " - " + Convert.ToString(dr["AgentID"]) + ".mp3"))
{
File.Move(sourceDir + Convert.ToString(dr["ID"]) + ".mp3", @"E:\Ses Dosyalari" + @"\" + fileDate.Year + @"\" + fileDate.Month + @"\" + Convert.ToString(dr["Time"]).Replace(":", ".") + " - " + Convert.ToString(dr["Tel"]) + " - " + Convert.ToString(dr["AgentID"]) + ".mp3");
cmd = new SqlCommand("UPDATE ResultTable SET Used = 1 WHERE ID="+Convert.ToString(dr["ID"]), callCenterConnection);
cmd.ExecuteNonQuery();
}
}
}
cmd.Dispose();
callCenterConnection.Close();
resultSet.Clear();
resultSet.Dispose();
}
}
Odds are pretty good that the bottleneck is not your code’s performance characteristics, but the network between the locations.
That said there’s plenty of room for optimization in the second method in particular.
First, always work with data from a database in bulk, checking for one record’s existence at a time is costly, read them all at once and put them into a list. Iterate over said list to perform your move/status updates.
Second, you’re concatenating strings with
+useString.FormatorString.Concatinstead (or aStringBuilderif that’s more to your liking)Being that the most taxed resource in this operation is the network, I’d suggest using a compression library to wrap up all of the files for a directory (at least) shipping that to the remote site and having a process unpack it there and send back a completion notification. Sending less is more.