This is my program:
program test
implicit none
integer n,m,k,i,j,Errorflag
real :: Yabs(39,39),angle(39,39)
real ,dimension(67,1) :: deltaA,A
real :: V(1,39),d(1,39),v1(29,1),d1(38,1),Ps(1,38),Qs(1,39),Jac(67,67),invJac(67,67)
real :: B1(1,38),B2(1,29),MF(1,67),trnsMF(67,1),P0(1,39),Q0(1,39)
real, dimension(38,38) :: dia1,offdia1,J1
real, dimension(29,29) :: dia2,dia3,dia4,offdia4,J4
real,dimension(38,29) ::offdia2,J2
real,dimension(29,38) ::offdia3,J3
real p,p1,q,q1
n=39;m=9
MF(1,1)=10
open(unit=3,file="ybus.dat",status="old")
open(unit=4,file="angle.dat",status="old")
do i=1,39
read(3,*) Yabs(i,1:39)
read(4,*)angle(i,1:39)
end do
close(3)
close(4)
open(unit=5,file="activepower.dat",status="old")
open(unit=8,file="reactivepower.dat",status="old")
read(5,*)Ps(1,1:38)
read(8,*)Qs(1,1:29)
close(5)
close(8)
do i=1,67
deltaA(i,1)=0
end do
v1(1:29,1)=1
d1(1:38,1)=0
A(1:38,1)=d1(1:38,1)
A(39:67,1)=v1(1:29,1)
!call cpu_time(t1)
do while(maxval(abs(MF))>0.0001)
V(1,1)=0.982
V(1,2:30)=v1(1:29,1)
V(1,31)=1.03
V(1,32)=0.9831
V(1,33)=1.0123
V(1,34)=0.9972
V(1,35)=1.0493
V(1,36)=1.0635
V(1,37)=1.0278
V(1,38)=1.0265
V(1,39)=1.0475
d(1,1)=0
d(1,2:39)=d1(1:38,1)
! % % % %------Active Power Calculation-----%
p1=0;p=0
do i=2,n
do j=1,n
p1=(V(i)*V(j)*Yabs(i,j)*cos(angle(i,j)-d(i)+d(j)))
p=p1+p
end do
P0(i-1)=p
p=0
end do
! % % % %------Reactive Power Calculation-----%
p=0;p1=0
do i=2,(n-m)
do j=1,n
p1=-(V(i)*V(j)*Yabs(i,j)*sin(angle(i,j)-d(i)+d(j)))
p=p1+p
end do
Q0(i-1)=p
p=0
end do
!!!!!!!!!!!mismatch factor
do i=1,(n-1)
B1(i)=Ps(i)-P0(i)
end do
do i=1,(n-m-1)
B2(i)=Qs(i)-Q0(i)
end do
MF(1,1:38)=B1(1,1:38)
MF(1,39:67)=B2(1,1:29)
!!!!!!!!jacobian calculation for preddictor step
!!!!!!!!!!!!!!!!!!!!!!dia of j1
p=0;p1=0
do i=2,n
do j=1,n
if(j .ne. i)then
p1=V(i)*V(j)*Yabs(i,j)*sin(angle(i,j)-d(i)+d(j))
!print*,p1
p=p1+p
end if
end do
i=i-1
dia1(i,i)=p
p=0
i=i+1
end do
!!!!!!!!!!!!!!off dia. of j1
q=0;q1=0;
do k=2,n
i=k
do j=2,n
if(j .ne. i)then
q1=V(i)*V(j)*Yabs(i,j)*sin(angle(i,j)-d(i)+d(j))
end if
i=i-1;j=j-1
offdia1(i,j)=-q1
q1=0
i=i+1;j=j+1
end do
end do
do i=1,38
do j=1,38
J1(i,j)=offdia1(i,j)+dia1(i,j)
end do
end do
!!!!!!!!!!!!!!!!!!!dia. of j2
p=0;p1=0
do i=2,(n-m)
do j=1,n
if(j .ne. i)then
p1=V(j)*Yabs(i,j)*cos(angle(i,j)-d(i)+d(j))
p=p1+p
end if
end do
dia2(i-1,i-1)=p+(2*V(i)*Yabs(i,i)*cos(angle(i,i)))
p=0;
end do
!!!!!!!!!!!!!!!!!!off dia. of j2
p1=0;
do k=2,n
i=k
do j=2,(n-m)
if(j .ne. i)then
p1=V(i)*Yabs(i,j)*cos(angle(i,j)-d(i)+d(j));
end if
i=i-1;j=j-1
offdia2(i,j)=p1
p1=0;
i=i+1;j=j+1
end do
end do
do i=1,(n-m-1)
offdia2(i,i)=dia2(i,i)
end do
J2=offdia2
!!!!!!!!!!!!!!!!!!!!dia. of j3
p=0;p1=0
do i=2,(n-m)
do j=1,n
if(j .ne. i)then
p1=V(i)*V(j)*Yabs(i,j)*cos(angle(i,j)-d(i)+d(j))
p=p1+p;
end if
end do
i=i-1;
dia3(i,i)=p
p=0;
i=i+1;
end do
!!!!!!!!!!!!!!off dia of j3
p=0;p1=0
do k=2,(n-m)
i=k;
do j=2,n
if(j .ne. i)then
p1=V(i)*V(j)*Yabs(i,j)*cos(angle(i,j)-d(i)+d(j))
end if
i=i-1;j=j-1
offdia3(i,j)=-p1;
p1=0;
i=i+1;j=j+1
end do
end do
do i=1,(n-m-1)
offdia3(i,i)=dia3(i,i)
end do
J3=offdia3
!!!!!!!!!!dia of j4
p=0;p1=0
do i=2,(n-m)
do j=1,n
if(j .ne. i)then
p1=V(j)*Yabs(i,j)*sin(angle(i,j)-d(i)+d(j))
p=p1+p
end if
end do
dia4(i-1,i-1)=-(2*V(i)*Yabs(i,i)*sin(angle(i,i)))-p
p=0;p1=0
end do
!!!!!!!!!!!!!!!off dia of j4
p1=0;p=0
do k=2,(n-m)
i=k;
do j=2,(n-m)
if(j .ne. i)then
p1=V(i)*Yabs(i,j)*sin(angle(i,j)-d(i)+d(j))
end if
i=i-1;j=j-1
offdia4(i,j)=-p1
p1=0;
i=i+1;j=j+1
end do
end do
do i=1,(n-m-1)
offdia4(i,i)=dia4(i,i);
end do
J4=offdia4
!!!!!!!
!!!!!!!!!!!!!!!!!!!formation of final jacobian!!!!!!!!!!
Jac( 1:38, 1:38) = J1 (1:38,1:38)
Jac( 1:38,39:67) = J2 (1:38,1:29)
Jac(39:67, 1:38) = J3 (1:29,1:38)
Jac(39:67,39:67) = J4 (1:29,1:29)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!print*,Jac(23,21)
CALL FindInv(Jac,invJac ,67, ErrorFlag)
trnsMF=transpose(MF)
deltaA=matmul( invJac, trnsMF)
do i=1,67
A(i)=A(i)+deltaA(i)
end do
!!!!!!!!!!!!updating values
do i=1,(n-1)
d1(i)=A(i)
end do
k=0
do i=n,(2*n-2-m)
k=1+k
v1(k)=A(i)
end do
end do
end program test
The array “Ps” contains some values. Now if I increase value of Ps(15) by Ps(15)+1 so for both values can I parallelize this code to get answer quickly.
I am using PGI compiler for CUDA FORTRAN.
Your code is fairly straightforward with lots of independent parallel loops. These parallel loops appear to be wrapped in an outer convergence
do whileloop, so as long as you keep the data on the device for all iterations of the convergence loop, you won’t be bottlenecked by transfers.I would recommend starting with compiler directives for this code rather than diving in to CUDA Fortran. Compiler directives work well for simple independent loops like these — they are simple hints that you place in code comments that tell the compiler which loops to parallelize, which data to copy, etc.
You can first try OpenMP to accelerate to multiple CPU cores. Then you can use GPU directives such as OpenACC, which is going to be available soon in compilers from PGI, Cray, and CAPS. To get a head start, you could download a free trial of the PGI compiler and use their “Accelerator” directives. Accelerator is very similar in syntax to OpenACC.