From 8fe89bbaf4752b7a43a6b2a175b7d18f7edafa3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C4=8Cert=C3=ADk?= Date: Tue, 2 Aug 2016 10:57:58 -0600 Subject: [PATCH 01/13] Simplify a loop --- src/rpoisson.f90 | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/rpoisson.f90 b/src/rpoisson.f90 index bd655ca..249607b 100644 --- a/src/rpoisson.f90 +++ b/src/rpoisson.f90 @@ -34,8 +34,7 @@ function rpoisson_outward_pc(R, Rp, rho) result(V) real(dp) :: V(size(R)) real(dp), dimension(size(R)) :: u1, u2, u1p, u2p -integer :: N, i, it -integer, parameter :: max_it = 2 +integer :: N, i real(dp) :: rho_mid(3) N = size(R) @@ -49,14 +48,14 @@ function rpoisson_outward_pc(R, Rp, rho) result(V) u2p(:4) = -(4*pi*rho(:4) + 2*u2(:4)/R(:4)) * Rp(:4) do i = 4, N-1 - u1(i+1) = u1(i) + adams_extrapolation_outward(u1p, i) u2(i+1) = u2(i) + adams_extrapolation_outward(u2p, i) - do it = 1, max_it - u1p(i+1) = +Rp(i+1) * u2(i+1) - u2p(i+1) = -Rp(i+1) * (4*pi*rho(i+1) + 2*u2(i+1)/R(i+1)) - u1(i+1) = u1(i) + adams_interp_outward(u1p, i) - u2(i+1) = u2(i) + adams_interp_outward(u2p, i) - end do + u2p(i+1) = -Rp(i+1) * (4*pi*rho(i+1) + 2*u2(i+1)/R(i+1)) + u2(i+1) = u2(i) + adams_interp_outward(u2p, i) + u2p(i+1) = -Rp(i+1) * (4*pi*rho(i+1) + 2*u2(i+1)/R(i+1)) + u2(i+1) = u2(i) + adams_interp_outward(u2p, i) + + u1p(i+1) = +Rp(i+1) * u2(i+1) + u1(i+1) = u1(i) + adams_interp_outward(u1p, i) end do V = u1 end function From 3f01994e34c387b026bdb3da3813e98d3f7e5da1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C4=8Cert=C3=ADk?= Date: Tue, 2 Aug 2016 11:19:07 -0600 Subject: [PATCH 02/13] Inline adams_*_outwards() --- src/rpoisson.f90 | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/rpoisson.f90 b/src/rpoisson.f90 index 249607b..ea27e1b 100644 --- a/src/rpoisson.f90 +++ b/src/rpoisson.f90 @@ -6,7 +6,6 @@ module rpoisson use types, only: dp use utils, only: stop_error use constants, only: pi -use ode1d, only: adams_extrapolation_outward, adams_interp_outward use ode1d, only: integrate, get_midpoints, rk4_integrate3 implicit none @@ -48,14 +47,14 @@ function rpoisson_outward_pc(R, Rp, rho) result(V) u2p(:4) = -(4*pi*rho(:4) + 2*u2(:4)/R(:4)) * Rp(:4) do i = 4, N-1 - u2(i+1) = u2(i) + adams_extrapolation_outward(u2p, i) + u2(i+1) = u2(i) + (55*u2p(i) - 59*u2p(i-1) + 37*u2p(i-2) - 9*u2p(i-3)) / 24 u2p(i+1) = -Rp(i+1) * (4*pi*rho(i+1) + 2*u2(i+1)/R(i+1)) - u2(i+1) = u2(i) + adams_interp_outward(u2p, i) + u2(i+1) = u2(i) + (9*u2p(i+1) + 19*u2p(i) - 5*u2p(i-1) + u2p(i-2)) / 24 u2p(i+1) = -Rp(i+1) * (4*pi*rho(i+1) + 2*u2(i+1)/R(i+1)) - u2(i+1) = u2(i) + adams_interp_outward(u2p, i) + u2(i+1) = u2(i) + (9*u2p(i+1) + 19*u2p(i) - 5*u2p(i-1) + u2p(i-2)) / 24 u1p(i+1) = +Rp(i+1) * u2(i+1) - u1(i+1) = u1(i) + adams_interp_outward(u1p, i) + u1(i+1) = u1(i) + (9*u1p(i+1) + 19*u1p(i) - 5*u1p(i-1) + u1p(i-2)) / 24 end do V = u1 end function From 5f8af96164f56406d3af29e125ff13df96eb4d41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C4=8Cert=C3=ADk?= Date: Tue, 2 Aug 2016 11:24:58 -0600 Subject: [PATCH 03/13] Introduce variable A in the expressions --- src/rpoisson.f90 | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/rpoisson.f90 b/src/rpoisson.f90 index ea27e1b..23d8965 100644 --- a/src/rpoisson.f90 +++ b/src/rpoisson.f90 @@ -35,6 +35,7 @@ function rpoisson_outward_pc(R, Rp, rho) result(V) real(dp), dimension(size(R)) :: u1, u2, u1p, u2p integer :: N, i real(dp) :: rho_mid(3) +real(dp) :: A N = size(R) rho_mid = get_midpoints(R(:4), rho(:4)) @@ -47,10 +48,11 @@ function rpoisson_outward_pc(R, Rp, rho) result(V) u2p(:4) = -(4*pi*rho(:4) + 2*u2(:4)/R(:4)) * Rp(:4) do i = 4, N-1 - u2(i+1) = u2(i) + (55*u2p(i) - 59*u2p(i-1) + 37*u2p(i-2) - 9*u2p(i-3)) / 24 - u2p(i+1) = -Rp(i+1) * (4*pi*rho(i+1) + 2*u2(i+1)/R(i+1)) - u2(i+1) = u2(i) + (9*u2p(i+1) + 19*u2p(i) - 5*u2p(i-1) + u2p(i-2)) / 24 - u2p(i+1) = -Rp(i+1) * (4*pi*rho(i+1) + 2*u2(i+1)/R(i+1)) + A = u2(i) + (55*u2p(i) - 59*u2p(i-1) + 37*u2p(i-2) - 9*u2p(i-3)) / 24 + A = -Rp(i+1) * (4*pi*rho(i+1) + 2*A/R(i+1)) + A = u2(i) + (9*A + 19*u2p(i) - 5*u2p(i-1) + u2p(i-2)) / 24 + + u2p(i+1) = -Rp(i+1) * (4*pi*rho(i+1) + 2*A/R(i+1)) u2(i+1) = u2(i) + (9*u2p(i+1) + 19*u2p(i) - 5*u2p(i-1) + u2p(i-2)) / 24 u1p(i+1) = +Rp(i+1) * u2(i+1) From ad498e9a96dea51a7c9320b5d8c6bd3556c4fe17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C4=8Cert=C3=ADk?= Date: Tue, 2 Aug 2016 11:29:02 -0600 Subject: [PATCH 04/13] Combine expressions --- src/rpoisson.f90 | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/rpoisson.f90 b/src/rpoisson.f90 index 23d8965..cfa687d 100644 --- a/src/rpoisson.f90 +++ b/src/rpoisson.f90 @@ -48,9 +48,11 @@ function rpoisson_outward_pc(R, Rp, rho) result(V) u2p(:4) = -(4*pi*rho(:4) + 2*u2(:4)/R(:4)) * Rp(:4) do i = 4, N-1 - A = u2(i) + (55*u2p(i) - 59*u2p(i-1) + 37*u2p(i-2) - 9*u2p(i-3)) / 24 - A = -Rp(i+1) * (4*pi*rho(i+1) + 2*A/R(i+1)) - A = u2(i) + (9*A + 19*u2p(i) - 5*u2p(i-1) + u2p(i-2)) / 24 + A = u2(i) + (9*(& + -Rp(i+1) * (4*pi*rho(i+1) + 2* ( & + u2(i) + (55*u2p(i) - 59*u2p(i-1) + 37*u2p(i-2) - 9*u2p(i-3)) / 24 & + )/R(i+1)) & + ) + 19*u2p(i) - 5*u2p(i-1) + u2p(i-2)) / 24 u2p(i+1) = -Rp(i+1) * (4*pi*rho(i+1) + 2*A/R(i+1)) u2(i+1) = u2(i) + (9*u2p(i+1) + 19*u2p(i) - 5*u2p(i-1) + u2p(i-2)) / 24 From 68b3c70863238b746a428b25810f1b2a107f3143 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C4=8Cert=C3=ADk?= Date: Tue, 2 Aug 2016 11:33:41 -0600 Subject: [PATCH 05/13] Rearrange expression --- src/rpoisson.f90 | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/rpoisson.f90 b/src/rpoisson.f90 index cfa687d..5d6ca07 100644 --- a/src/rpoisson.f90 +++ b/src/rpoisson.f90 @@ -48,11 +48,12 @@ function rpoisson_outward_pc(R, Rp, rho) result(V) u2p(:4) = -(4*pi*rho(:4) + 2*u2(:4)/R(:4)) * Rp(:4) do i = 4, N-1 - A = u2(i) + (9*(& - -Rp(i+1) * (4*pi*rho(i+1) + 2* ( & - u2(i) + (55*u2p(i) - 59*u2p(i-1) + 37*u2p(i-2) - 9*u2p(i-3)) / 24 & - )/R(i+1)) & - ) + 19*u2p(i) - 5*u2p(i-1) + u2p(i-2)) / 24 + A = + u2(i) * (1 - 3._dp /4 *Rp(i+1)/R(i+1)) & + + u2p(i) * (19._dp/24 - 55._dp/32*Rp(i+1)/R(i+1)) & + - u2p(i-1) * (5._dp /24 - 59._dp/32*Rp(i+1)/R(i+1)) & + + u2p(i-2) * (1._dp /24 - 37._dp/32*Rp(i+1)/R(i+1)) & + + u2p(i-3) * 9._dp /32*Rp(i+1)/R(i+1) & + - rho(i+1) * 3._dp /2 *Rp(i+1)*pi u2p(i+1) = -Rp(i+1) * (4*pi*rho(i+1) + 2*A/R(i+1)) u2(i+1) = u2(i) + (9*u2p(i+1) + 19*u2p(i) - 5*u2p(i-1) + u2p(i-2)) / 24 From 30edb9645c7afcccd6616a8e6c027ea30518ccbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C4=8Cert=C3=ADk?= Date: Wed, 3 Aug 2016 21:18:59 -0600 Subject: [PATCH 06/13] Join lines --- src/rpoisson.f90 | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/rpoisson.f90 b/src/rpoisson.f90 index 5d6ca07..6365a33 100644 --- a/src/rpoisson.f90 +++ b/src/rpoisson.f90 @@ -35,7 +35,7 @@ function rpoisson_outward_pc(R, Rp, rho) result(V) real(dp), dimension(size(R)) :: u1, u2, u1p, u2p integer :: N, i real(dp) :: rho_mid(3) -real(dp) :: A +real(dp) :: RR N = size(R) rho_mid = get_midpoints(R(:4), rho(:4)) @@ -48,14 +48,15 @@ function rpoisson_outward_pc(R, Rp, rho) result(V) u2p(:4) = -(4*pi*rho(:4) + 2*u2(:4)/R(:4)) * Rp(:4) do i = 4, N-1 - A = + u2(i) * (1 - 3._dp /4 *Rp(i+1)/R(i+1)) & - + u2p(i) * (19._dp/24 - 55._dp/32*Rp(i+1)/R(i+1)) & - - u2p(i-1) * (5._dp /24 - 59._dp/32*Rp(i+1)/R(i+1)) & - + u2p(i-2) * (1._dp /24 - 37._dp/32*Rp(i+1)/R(i+1)) & - + u2p(i-3) * 9._dp /32*Rp(i+1)/R(i+1) & - - rho(i+1) * 3._dp /2 *Rp(i+1)*pi - - u2p(i+1) = -Rp(i+1) * (4*pi*rho(i+1) + 2*A/R(i+1)) + RR = Rp(i+1)/R(i+1) + u2p(i+1) = & + - u2(i) * (2 - 3._dp /2 *RR)*RR & + - u2p(i) * (19._dp/12 - 55._dp/16*RR)*RR & + + u2p(i-1) * (5._dp /12 - 59._dp/16*RR)*RR & + - u2p(i-2) * (1._dp /12 - 37._dp/16*RR)*RR & + - u2p(i-3) * 9._dp /16*RR *RR & + - rho(i+1) * (4 - 3 *RR)*Rp(i+1)*pi + u2(i+1) = u2(i) + (9*u2p(i+1) + 19*u2p(i) - 5*u2p(i-1) + u2p(i-2)) / 24 u1p(i+1) = +Rp(i+1) * u2(i+1) From fe97bf60d38dccfb7af4755af85c847bf2f0ce0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C4=8Cert=C3=ADk?= Date: Thu, 4 Aug 2016 14:09:16 -0600 Subject: [PATCH 07/13] Introduce rpoisson_kernel1() --- src/rpoisson.f90 | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/src/rpoisson.f90 b/src/rpoisson.f90 index 6365a33..cd0801b 100644 --- a/src/rpoisson.f90 +++ b/src/rpoisson.f90 @@ -15,6 +15,28 @@ module rpoisson contains +subroutine rpoisson_kernel1(R, Rp, rho, u1, u2, u1p, u2p) +real(dp), intent(in) :: R(:), Rp(:), rho(:) +real(dp), intent(inout) :: u1(:), u2(:), u1p(:), u2p(:) +real(dp) :: RR +integer :: i +do i = 4, size(R)-1 + RR = Rp(i+1)/R(i+1) + u2p(i+1) = & + - u2(i) * (2 - 3._dp /2 *RR)*RR & + - u2p(i) * (19._dp/12 - 55._dp/16*RR)*RR & + + u2p(i-1) * (5._dp /12 - 59._dp/16*RR)*RR & + - u2p(i-2) * (1._dp /12 - 37._dp/16*RR)*RR & + - u2p(i-3) * 9._dp /16*RR *RR & + - rho(i+1) * (4 - 3 *RR)*Rp(i+1)*pi + + u2(i+1) = u2(i) + (9*u2p(i+1) + 19*u2p(i) - 5*u2p(i-1) + u2p(i-2)) / 24 + + u1p(i+1) = +Rp(i+1) * u2(i+1) + u1(i+1) = u1(i) + (9*u1p(i+1) + 19*u1p(i) - 5*u1p(i-1) + u1p(i-2)) / 24 +end do +end subroutine + function rpoisson_outward_pc(R, Rp, rho) result(V) ! Solves the equation V''(r) + 2/r*V'(r) = -4*pi*rho ! @@ -33,35 +55,16 @@ function rpoisson_outward_pc(R, Rp, rho) result(V) real(dp) :: V(size(R)) real(dp), dimension(size(R)) :: u1, u2, u1p, u2p -integer :: N, i real(dp) :: rho_mid(3) -real(dp) :: RR -N = size(R) rho_mid = get_midpoints(R(:4), rho(:4)) call rpoisson_outward_rk4(rho(:4), rho_mid, R(:4), & 4*pi*integrate(Rp, rho*R), & 0.0_dp, & u1(:4), u2(:4)) - u1p(:4) = u2(:4) * Rp(:4) u2p(:4) = -(4*pi*rho(:4) + 2*u2(:4)/R(:4)) * Rp(:4) - -do i = 4, N-1 - RR = Rp(i+1)/R(i+1) - u2p(i+1) = & - - u2(i) * (2 - 3._dp /2 *RR)*RR & - - u2p(i) * (19._dp/12 - 55._dp/16*RR)*RR & - + u2p(i-1) * (5._dp /12 - 59._dp/16*RR)*RR & - - u2p(i-2) * (1._dp /12 - 37._dp/16*RR)*RR & - - u2p(i-3) * 9._dp /16*RR *RR & - - rho(i+1) * (4 - 3 *RR)*Rp(i+1)*pi - - u2(i+1) = u2(i) + (9*u2p(i+1) + 19*u2p(i) - 5*u2p(i-1) + u2p(i-2)) / 24 - - u1p(i+1) = +Rp(i+1) * u2(i+1) - u1(i+1) = u1(i) + (9*u1p(i+1) + 19*u1p(i) - 5*u1p(i-1) + u1p(i-2)) / 24 -end do +call rpoisson_kernel1(R, Rp, rho, u1, u2, u1p, u2p) V = u1 end function From c008ac3d3d3dca4d3614ac8d866d3368e56fede5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C4=8Cert=C3=ADk?= Date: Thu, 4 Aug 2016 14:17:01 -0600 Subject: [PATCH 08/13] Add the original kernel back --- src/rpoisson.f90 | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/rpoisson.f90 b/src/rpoisson.f90 index cd0801b..7cc1477 100644 --- a/src/rpoisson.f90 +++ b/src/rpoisson.f90 @@ -6,18 +6,36 @@ module rpoisson use types, only: dp use utils, only: stop_error use constants, only: pi -use ode1d, only: integrate, get_midpoints, rk4_integrate3 +use ode1d, only: integrate, get_midpoints, rk4_integrate3, & + adams_extrapolation_outward, adams_interp_outward implicit none private -public rpoisson_outward_pc +public rpoisson_outward_pc, rpoisson_kernel1, rpoisson_kernel2 contains subroutine rpoisson_kernel1(R, Rp, rho, u1, u2, u1p, u2p) real(dp), intent(in) :: R(:), Rp(:), rho(:) real(dp), intent(inout) :: u1(:), u2(:), u1p(:), u2p(:) +integer, parameter :: max_it=2 +integer :: i, it +do i = 4, size(R)-1 + u1(i+1) = u1(i) + adams_extrapolation_outward(u1p, i) + u2(i+1) = u2(i) + adams_extrapolation_outward(u2p, i) + do it = 1, max_it + u1p(i+1) = +Rp(i+1) * u2(i+1) + u2p(i+1) = -Rp(i+1) * (4*pi*rho(i+1) + 2*u2(i+1)/R(i+1)) + u1(i+1) = u1(i) + adams_interp_outward(u1p, i) + u2(i+1) = u2(i) + adams_interp_outward(u2p, i) + end do +end do +end subroutine + +subroutine rpoisson_kernel2(R, Rp, rho, u1, u2, u1p, u2p) +real(dp), intent(in) :: R(:), Rp(:), rho(:) +real(dp), intent(inout) :: u1(:), u2(:), u1p(:), u2p(:) real(dp) :: RR integer :: i do i = 4, size(R)-1 From 4b53a3ee25aa153d372bd76838a1bb646b4d9439 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C4=8Cert=C3=ADk?= Date: Thu, 4 Aug 2016 14:25:13 -0600 Subject: [PATCH 09/13] Add a bench_poisson benchmark --- tests/CMakeLists.txt | 1 + tests/bench_poisson/CMakeLists.txt | 8 ++++++++ tests/bench_poisson/bench_poisson.f90 | 18 ++++++++++++++++++ 3 files changed, 27 insertions(+) create mode 100644 tests/bench_poisson/CMakeLists.txt create mode 100644 tests/bench_poisson/bench_poisson.f90 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ff11625..e892d96 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -9,6 +9,7 @@ add_subdirectory(atom_U) add_subdirectory(oscillator) add_subdirectory(well) add_subdirectory(pseudopotential) +add_subdirectory(bench_poisson) if (WITH_LAPACK) add_subdirectory(double_min) endif () diff --git a/tests/bench_poisson/CMakeLists.txt b/tests/bench_poisson/CMakeLists.txt new file mode 100644 index 0000000..d4df5e7 --- /dev/null +++ b/tests/bench_poisson/CMakeLists.txt @@ -0,0 +1,8 @@ +include_directories(${PROJECT_BINARY_DIR}/src) + +project(bench_poisson) + +add_executable(bench_poisson bench_poisson.f90) +target_link_libraries(bench_poisson dftatom) + +add_test(bench_poisson ${PROJECT_BINARY_DIR}/bench_poisson) diff --git a/tests/bench_poisson/bench_poisson.f90 b/tests/bench_poisson/bench_poisson.f90 new file mode 100644 index 0000000..963f401 --- /dev/null +++ b/tests/bench_poisson/bench_poisson.f90 @@ -0,0 +1,18 @@ +program bench_poisson +use types, only: dp +use mesh, only: mesh_exp, mesh_exp_deriv +use rpoisson, only: rpoisson_outward_pc +implicit none + +! Mesh parameters: +real(dp), parameter :: r_min = 1e-7_dp, r_max = 10, a = 2.7e6_dp +integer, parameter :: NN = 5000 + +real(dp) :: R(NN+1) +real(dp), dimension(size(R)) :: Rp, u1, u2, u1p, u2p, rho + +R = mesh_exp(r_min, r_max, a, NN) +Rp = mesh_exp_deriv(r_min, r_max, a, NN) +rho = exp(-R) +u1 = rpoisson_outward_pc(R, Rp, rho) +end program From 6a7c63b541670d2a1ebacb5d52b70414134908ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C4=8Cert=C3=ADk?= Date: Thu, 4 Aug 2016 14:37:16 -0600 Subject: [PATCH 10/13] Add a test for Poisson kernels --- tests/bench_poisson/CMakeLists.txt | 5 ++- tests/bench_poisson/test_poisson_kernels.f90 | 34 ++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 tests/bench_poisson/test_poisson_kernels.f90 diff --git a/tests/bench_poisson/CMakeLists.txt b/tests/bench_poisson/CMakeLists.txt index d4df5e7..a8bde77 100644 --- a/tests/bench_poisson/CMakeLists.txt +++ b/tests/bench_poisson/CMakeLists.txt @@ -4,5 +4,8 @@ project(bench_poisson) add_executable(bench_poisson bench_poisson.f90) target_link_libraries(bench_poisson dftatom) - add_test(bench_poisson ${PROJECT_BINARY_DIR}/bench_poisson) + +add_executable(test_poisson_kernels test_poisson_kernels.f90) +target_link_libraries(test_poisson_kernels dftatom) +add_test(test_poisson_kernels ${PROJECT_BINARY_DIR}/test_poisson_kernels) diff --git a/tests/bench_poisson/test_poisson_kernels.f90 b/tests/bench_poisson/test_poisson_kernels.f90 new file mode 100644 index 0000000..94d62a6 --- /dev/null +++ b/tests/bench_poisson/test_poisson_kernels.f90 @@ -0,0 +1,34 @@ +program test_poisson_kernels +use utils, only: assert +use types, only: dp +use mesh, only: mesh_exp, mesh_exp_deriv +use rpoisson, only: rpoisson_kernel1, rpoisson_kernel2 +implicit none + +! Mesh parameters: +real(dp), parameter :: r_min = 1e-7_dp, r_max = 10, a = 2.7e6_dp +integer, parameter :: NN = 5000 + +real(dp) :: R(NN+1) +real(dp), dimension(size(R)) :: Rp, rho +real(dp), dimension(size(R), 2) :: u1, u2, u1p, u2p + +R = mesh_exp(r_min, r_max, a, NN) +Rp = mesh_exp_deriv(r_min, r_max, a, NN) +rho = exp(-R) +u1 = 1 +u2 = 1 +u1p = 0 +u2p = 0 +call rpoisson_kernel1(R, Rp, rho, u1(:, 1), u2(:, 1), u1p(:, 1), u2p(:, 1)) +call rpoisson_kernel2(R, Rp, rho, u1(:, 2), u2(:, 2), u1p(:, 2), u2p(:, 2)) +print *, "Errors:" +print *, maxval(abs(u1(:,1)-u1(:,2))) +print *, maxval(abs(u2(:,1)-u2(:,2))) +print *, maxval(abs(u1p(:,1)-u1p(:,2))) +print *, maxval(abs(u2p(:,1)-u2p(:,2))) +call assert(maxval(abs(u1(:,1)-u1(:,2))) < 1e-10_dp) +call assert(maxval(abs(u2(:,1)-u2(:,2))) < 1e-15_dp) +call assert(maxval(abs(u1p(:,1)-u1p(:,2))) < 1e-9_dp) +call assert(maxval(abs(u2p(:,1)-u2p(:,2))) < 1e-16_dp) +end program From d068a2e5655171bcb0abd9d0f8ebde3761dbbb85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C4=8Cert=C3=ADk?= Date: Thu, 4 Aug 2016 14:37:55 -0600 Subject: [PATCH 11/13] Benchmark the kernels --- tests/bench_poisson/bench_poisson.f90 | 41 ++++++++++++++++++++------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/tests/bench_poisson/bench_poisson.f90 b/tests/bench_poisson/bench_poisson.f90 index 963f401..8550663 100644 --- a/tests/bench_poisson/bench_poisson.f90 +++ b/tests/bench_poisson/bench_poisson.f90 @@ -1,18 +1,39 @@ program bench_poisson use types, only: dp use mesh, only: mesh_exp, mesh_exp_deriv -use rpoisson, only: rpoisson_outward_pc +use rpoisson, only: rpoisson_kernel1, rpoisson_kernel2 implicit none -! Mesh parameters: real(dp), parameter :: r_min = 1e-7_dp, r_max = 10, a = 2.7e6_dp -integer, parameter :: NN = 5000 +real(dp), allocatable, dimension(:) :: R, Rp, u1, u2, u1p, u2p, rho +real(dp) :: t1, t2, dt1, dt2 +integer :: N, i, iter -real(dp) :: R(NN+1) -real(dp), dimension(size(R)) :: Rp, u1, u2, u1p, u2p, rho - -R = mesh_exp(r_min, r_max, a, NN) -Rp = mesh_exp_deriv(r_min, r_max, a, NN) -rho = exp(-R) -u1 = rpoisson_outward_pc(R, Rp, rho) +print *, "N iter dt1[s] dt2[s] total_time[s] total_mem[MB]" +do N = 5000, 50000, 5000 + allocate(R(N), Rp(N), u1(N), u2(N), u1p(N), u2p(N), rho(N)) + R = mesh_exp(r_min, r_max, a, N-1) + Rp = mesh_exp_deriv(r_min, r_max, a, N-1) + rho = exp(-R) + u1 = 1 + u2 = 1 + u1p = 0 + u2p = 0 + iter = 4000 * 5000/N + call cpu_time(t1) + do i = 1, iter + call rpoisson_kernel1(R, Rp, rho, u1, u2, u1p, u2p) + end do + call cpu_time(t2) + dt1 = (t2-t1)/iter + call cpu_time(t1) + do i = 1, iter + call rpoisson_kernel2(R, Rp, rho, u1, u2, u1p, u2p) + end do + call cpu_time(t2) + dt2 = (t2-t1)/iter + print "(i6, i6, es12.4, es12.4, f6.2, f6.2)", N, iter, dt1, dt2, dt1*iter, & + N*8*7/1024._dp**2 + deallocate(R, Rp, u1, u2, u1p, u2p, rho) +end do end program From 96f62ffa0639633535d175e15cffde6020891bc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C4=8Cert=C3=ADk?= Date: Thu, 4 Aug 2016 15:19:31 -0600 Subject: [PATCH 12/13] Add the notebook to plot the results --- tests/bench_poisson/Plots.ipynb | 104 ++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 tests/bench_poisson/Plots.ipynb diff --git a/tests/bench_poisson/Plots.ipynb b/tests/bench_poisson/Plots.ipynb new file mode 100644 index 0000000..40b198f --- /dev/null +++ b/tests/bench_poisson/Plots.ipynb @@ -0,0 +1,104 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "%pylab inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "D = loadtxt(\"a.txt\", skiprows=1)\n", + "N = D[:, 0]\n", + "t1 = D[:, 2]\n", + "t2 = D[:, 3]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plot(N, t1, label=\"kernel 1\")\n", + "plot(N, t2, label=\"kernel 2\")\n", + "legend(loc=\"upper left\")\n", + "grid()\n", + "xlabel(\"N\")\n", + "ylabel(\"time [s]\")\n", + "title(\"Radial Poisson kernels comparison\")\n", + "show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plot(N, t1/t2)\n", + "grid()\n", + "xlabel(\"N\")\n", + "ylabel(\"Speedup\")\n", + "title(\"Speedup of kernel 2 over kernel 1\")\n", + "show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "freq = 2.6e9*1.2\n", + "plot(N, t1*freq/N, label=\"kernel 1\")\n", + "plot(N, t2*freq/N, label=\"kernel 2\")\n", + "legend()\n", + "grid()\n", + "xlabel(\"N\")\n", + "ylabel(\"Cycles per element\")\n", + "title(\"Comparison in clock cycles per loop body (array element)\")\n", + "ylim([30, None])\n", + "show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.11" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 76de81748b422323c71816d2ad871c6b00db53f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C4=8Cert=C3=ADk?= Date: Thu, 4 Aug 2016 16:25:18 -0600 Subject: [PATCH 13/13] Use a smaller number in order to run faster --- tests/bench_poisson/bench_poisson.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/bench_poisson/bench_poisson.f90 b/tests/bench_poisson/bench_poisson.f90 index 8550663..f730abb 100644 --- a/tests/bench_poisson/bench_poisson.f90 +++ b/tests/bench_poisson/bench_poisson.f90 @@ -19,7 +19,7 @@ program bench_poisson u2 = 1 u1p = 0 u2p = 0 - iter = 4000 * 5000/N + iter = 40 * 5000/N call cpu_time(t1) do i = 1, iter call rpoisson_kernel1(R, Rp, rho, u1, u2, u1p, u2p)