Why does this LINQ query (Id is a property of type long in the Structure object):
IList<Structure> theStructures = new List<Structure>();
public int GetChildrenSlow(Structure aStructure){
IEnumerable<Structure> childrenQuery =
from structure in theStructures
where structure.ParentStructureId == aStructure.Id
select structure;
int count = childrenQuery.Count();
//Functionality continues...
}
Run slower than this one:
IList<Structure> theStructures = new List<Structure>();
public int GetChildrenFast(long aStructureId){
IEnumerable<Structure> childrenQuery =
from structure in theStructures
where structure.ParentStructureId == aStructureId
select structure;
int count = childrenQuery.Count();
//Functionality continues...
}
I am making this call thousands of time (recursively) and using the property is much slower than using the long directly. If I pull the Id out and store it in a long variable before I execute the LINQ command the speed is pretty much equivalent to the speed of GetChildrenFast. Why is using an object property in LINQ slower than using a primitive?
Working Example:
namespace ConsoleApplication1
{
class Structure
{
public int Id
{
get; set;
}
public int ParentStructureId
{
get; set;
}
}
class Program
{
private IList<Structure> theStructures = new List<Structure>();
public Structure FirstStructure
{
get; set;
}
private int FastCountStructureChildren(long aStructureId)
{
IEnumerable<Structure> childrenQuery =
from structure in theStructures
where structure.ParentStructureId == aStructureId
select structure;
int count = childrenQuery.Count();
foreach(Structure childStructure in childrenQuery)
{
count += FastCountStructureChildren(childStructure.Id);
}
return count;
}
private int SlowCountStructureChildren(Structure aStructure)
{
IEnumerable<Structure> childrenQuery =
from structure in theStructures
where structure.ParentStructureId == aStructure.Id
select structure;
int count = childrenQuery.Count();
foreach(Structure childStructure in childrenQuery)
{
count += SlowCountStructureChildren(childStructure);
}
return count;
}
public void BuildStructure()
{
FirstStructure = new Structure{Id = 0, ParentStructureId = -1};
theStructures.Add(FirstStructure);
//The loop only goes to 6000 as any more than that causes
//a StackOverflowException my development machine.
for(int i=1; i<6000; i++)
{
Structure newStructure = new Structure{Id = i,ParentStructureId = i - 1};
theStructures.Add(newStructure);
}
}
static void Main(string[] args)
{
Program program = new Program();
program.BuildStructure();
Stopwatch fastStopwatch = new Stopwatch();
fastStopwatch.Start();
program.FastCountStructureChildren(0);
fastStopwatch.Stop();
Stopwatch slowStopwatch = new Stopwatch();
slowStopwatch.Start();
program.SlowCountStructureChildren(program.FirstStructure);
slowStopwatch.Stop();
Console.WriteLine("Fast time: " + fastStopwatch.Elapsed);
Console.WriteLine("Slow time: " + slowStopwatch.Elapsed);
Console.ReadLine();
}
}
}
Running your full example as you provided
Only if I run in debug mode is the slow time actually slower. That is because in debug mode methods are never inlined, and there are NOPs littered everywhere to allow you to break, e.g. inside the Id getter.
Since you obviously care about run speed, I’ll point out an unrelated inefficiency: you’re running the query twice: once for count and once for iterating over the children. Running it only once (and increasing count by 1 in the loop) should speed things up.
The way I’d usually solve this problem, by the way, is if it ever makes sense to call the
GetChildrenmethod directly with an id, provide two overloads. Otherwise, provide the one (Structure) overload and get the id before the query, as inlong id = aStructure.id;.