From 9d70b1f908c84ea68d4da3a0bb99d6fc9bdde46a Mon Sep 17 00:00:00 2001 From: Daniel Howard Date: Thu, 7 May 2026 14:06:42 -0600 Subject: [PATCH 1/3] Optimizations for atm_compute_solve_diagnostics_work --- .../dynamics/mpas_atm_time_integration.F | 75 ++++++++++--------- 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 238ca7235f..2bd85d89ea 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -7119,7 +7119,7 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! Local variables ! integer :: iEdge, iCell, iVertex, k, cell1, cell2, eoe, i, j - real (kind=RKIND) :: h_vertex, r, s + real (kind=RKIND) :: h_vertex, sign_norm real (kind=RKIND) :: r1, r2 logical, parameter :: hollingsworth=.true. @@ -7133,77 +7133,80 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! !$acc parallel default(present) !$acc loop gang - do iEdge=edgeStart,edgeEnd - cell1 = cellsOnEdge(1,iEdge) - cell2 = cellsOnEdge(2,iEdge) -!DIR$ IVDEP - !$acc loop vector - do k=1,nVertLevels - h_edge(k,iEdge) = 0.5 * (h(k,cell1) + h(k,cell2)) - end do + !do iEdge=edgeStart,edgeEnd + ! cell1 = cellsOnEdge(1,iEdge) + ! cell2 = cellsOnEdge(2,iEdge) +!!DIR$ IVDEP + ! ! the first openmp barrier below is set so that ke_edge is computed + ! ! it would be good to move this somewhere else? + ! ! DH - Fused loop vector + ! efac = dcEdge(iEdge)*dvEdge(iEdge) + ! !$acc loop vector + ! do k=1,nVertLevels + ! h_edge(k,iEdge) = 0.5 * (h(k,cell1) + h(k,cell2)) + ! ke_edge(k,iEdge) = efac*u(k,iEdge)**2 + ! end do + !end do -! the first openmp barrier below is set so that ke_edge is computed -! it would be good to move this somewhere else? - - efac = dcEdge(iEdge)*dvEdge(iEdge) - !$acc loop vector +!DIR$ IVDEP + !$acc loop gang vector collapse(2) + do iEdge=edgeStart,edgeEnd do k=1,nVertLevels - ke_edge(k,iEdge) = efac*u(k,iEdge)**2 + h_edge(k,iEdge) = 0.5_RKIND * (h(k,cellsOnEdge(1,iEdge)) + h(k,cellsOnEdge(2,iEdge))) + ke_edge(k,iEdge) = dcEdge(iEdge)*dvEdge(iEdge) * u(k,iEdge)**2 end do - end do - ! ! Compute circulation and relative vorticity at each vertex ! !$acc loop gang do iVertex=vertexStart,vertexEnd + ! DH - Direct asign first iteration without zero initialization + iEdge = edgesOnVertex(1,iVertex) + sign_norm = edgesOnVertex_sign(1,iVertex) * dcEdge(iEdge) * invAreaTriangle(iVertex) +!DIR$ IVDEP !$acc loop vector do k=1,nVertLevels - vorticity(k,iVertex) = 0.0_RKIND + vorticity(k,iVertex) = sign_norm * u(k,iEdge) end do + + ! DH - Folded invAreaTriangle into sign_norm var !$acc loop seq - do i=1,vertexDegree + do i=2,vertexDegree iEdge = edgesOnVertex(i,iVertex) - s = edgesOnVertex_sign(i,iVertex) * dcEdge(iEdge) + sign_norm = edgesOnVertex_sign(i,iVertex) * dcEdge(iEdge) * invAreaTriangle(iVertex) !DIR$ IVDEP !$acc loop vector do k=1,nVertLevels - vorticity(k,iVertex) = vorticity(k,iVertex) + s * u(k,iEdge) + vorticity(k,iVertex) = vorticity(k,iVertex) + sign_norm * u(k,iEdge) end do end do -!DIR$ IVDEP - !$acc loop vector - do k=1,nVertLevels - vorticity(k,iVertex) = vorticity(k,iVertex) * invAreaTriangle(iVertex) - end do end do ! ! Compute the divergence at each cell center - ! + ! DH - folded invAreaCell(iCell) into sign_norm !$acc loop gang do iCell=cellStart,cellEnd + ! DH - Direct asign first iteration without zero initialization + iEdge = edgesOnCell(1,iCell) + sign_norm = edgesOnCell_sign(1,iCell) * dvEdge(iEdge) * invAreaCell(iCell) +!DIR$ IVDEP !$acc loop vector do k=1,nVertLevels - divergence(k,iCell) = 0.0_RKIND + divergence(k,iCell) = sign_norm * u(k,iEdge) end do !$acc loop seq - do i=1,nEdgesOnCell(iCell) + do i=2,nEdgesOnCell(iCell) iEdge = edgesOnCell(i,iCell) - s = edgesOnCell_sign(i,iCell) * dvEdge(iEdge) + sign_norm = edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invAreaCell(iCell) !DIR$ IVDEP !$acc loop vector do k=1,nVertLevels - divergence(k,iCell) = divergence(k,iCell) + s * u(k,iEdge) + divergence(k,iCell) = divergence(k,iCell) + sign_norm * u(k,iEdge) end do end do - r = invAreaCell(iCell) - !$acc loop vector - do k = 1,nVertLevels - divergence(k,iCell) = divergence(k,iCell) * r - end do end do !$acc end parallel From eae3f2987ff9a81fd946bce22b64019c372e6aaa Mon Sep 17 00:00:00 2001 From: Daniel Howard Date: Thu, 7 May 2026 14:08:55 -0600 Subject: [PATCH 2/3] Restored r variable for other functions --- src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 2bd85d89ea..68851832e4 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -7119,7 +7119,7 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! Local variables ! integer :: iEdge, iCell, iVertex, k, cell1, cell2, eoe, i, j - real (kind=RKIND) :: h_vertex, sign_norm + real (kind=RKIND) :: h_vertex, r, sign_norm real (kind=RKIND) :: r1, r2 logical, parameter :: hollingsworth=.true. @@ -7254,7 +7254,7 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & !$acc parallel default(present) !$acc loop gang do iVertex=vertexStart,vertexEnd - r = 0.25 * invAreaTriangle(iVertex) + r = 0.25_RKIND * invAreaTriangle(iVertex) !$acc loop vector do k=1,nVertLevels @@ -7367,7 +7367,7 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & do iEdge = edgeStart,edgeEnd !DIR$ IVDEP do k=1,nVertLevels - pv_edge(k,iEdge) = 0.5 * (pv_vertex(k,verticesOnEdge(1,iEdge)) + pv_vertex(k,verticesOnEdge(2,iEdge))) + pv_edge(k,iEdge) = 0.5_RKIND * (pv_vertex(k,verticesOnEdge(1,iEdge)) + pv_vertex(k,verticesOnEdge(2,iEdge))) end do end do !$acc end parallel From f9fad48cc4d15e4c238bdadee5001ce10dfae1e7 Mon Sep 17 00:00:00 2001 From: Daniel Howard Date: Thu, 7 May 2026 14:42:33 -0600 Subject: [PATCH 3/3] Collapsed gang vector loops --- src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 68851832e4..d888cdc156 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -7132,7 +7132,7 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! Compute height on cell edges at velocity locations ! !$acc parallel default(present) - !$acc loop gang + !!$acc loop gang !do iEdge=edgeStart,edgeEnd ! cell1 = cellsOnEdge(1,iEdge) ! cell2 = cellsOnEdge(2,iEdge)