I am playing with parallel programming and F#. I created a function that integrates a 1-variable function, and then I tried to make it parallel in two different ways:
module Quadrature =
let Integrate (f: double -> double) (x1: double) (x2: double) (samples: int64) =
let step = (x2 - x1) / (double samples)
let samplePoints = seq {x1 + step .. step .. x2 - step}
let sum = samplePoints |> Seq.map (fun x -> f x) |> Seq.sum
let sum = sum + ((f x1) + (f x2)) / 2.0
step * sum
let IntegrateWithStep (f: double -> double) (x1: double) (x2: double) (step: double) =
let samples = (x2 - x1) / step |> round |> int64
Integrate f x1 x2 samples
let IntegrateWithTasks (f: double -> double) (x1: double) (x2: double) (samples: int64) (tasks: int) =
let step = (x2 - x1) / (double samples)
let samplesPerTask = ceil <| (double samples) / (double tasks)
let interval = step * samplesPerTask
let intervals =
seq {
for i in 0 .. (tasks - 1) do
let lowerBound = x1 + (double i) * interval
let upperBound = min (lowerBound + interval) x2
yield (lowerBound, upperBound)
}
let tasks = intervals
|> Seq.map (fun (a, b) -> Task.Factory.StartNew(fun () -> IntegrateWithStep f a b step))
tasks |> Seq.map (fun t -> t.Result) |> Seq.sum
let IntegrateParallel (f: double -> double) (x1: double) (x2: double) (samples: int64) (tasks: int) =
let step = (x2 - x1) / (double samples)
let samplesPerTask = ceil <| (double samples) / (double tasks)
let interval = step * samplesPerTask
let intervals =
[| for i in 0 .. (tasks - 1) do
let lowerBound = x1 + (double i) * interval
let upperBound = min (lowerBound + interval) x2
yield (lowerBound, upperBound) |]
intervals |> Array.Parallel.map (fun (a, b) -> IntegrateWithStep f a b step)
|> Array.sum
I run this code with the following input on a machine with 4 cores:
let f = (fun x -> - 1.0 + 2.0 * x - 3.0 * x * x + 4.0 * x * x * x )
let x1, x2 = 0.0, 1.0
let samples = 100000000L
let tasks = 100
However, the method with the task factory is always slightly slower than the linear one, while the one with the Parallel.map gives me a good speed up.
I have tried varying the number of tasks from thousands down to the number of cores but the implementation with the Task.Factory is always slower than the linear one. What am I doing wrong?
Remember that sequences are lazily loaded. Here is the first time the task gets started:
And you are starting them sequentially and blocking each one for their result (when calling
t.Result. You’ll want to save the list of tasks as an array, and then call.WaitAllbefore collecting the results to ensure they all get started in parallel.Try: