From cea246486fb0f1e63f68f5a43728084335f05e4f Mon Sep 17 00:00:00 2001
From: arturcastiel <arturcastiel@gmail.com>
Date: Thu, 2 Jul 2026 12:54:00 +0000
Subject: [PATCH 1/2] flow: parallel LGR INIT output from gathered simulator
 transmissibilities

A parallel run with LGRs cannot write a correct INIT file: the output
path queries transmissibilities on the global refined grid, which the
coarse per-rank globalTrans_ cannot answer. Fix it by reusing the values
the simulation itself already computed in parallel.

Each rank walks its interior leaf cells and records every connection it
owns from its own (distributed) simulator transmissibilities, keyed by
level-Cartesian indices: same-level connections as (level, min, max),
level-crossing ones as (smaller level, its index, larger level, its
index). The keys are geometrically canonical -- identical on the
distributed grid and the I/O rank's global view -- so the existing
output walk (computeTrans_ / exportNncStructure_) looks the values up
directly; a missing key is a hard error. The records are gathered on
the I/O rank once, with a plain counts/displacements gatherv (new
helper gatherLgrOutputTrans in LgrOutputTransGather.hpp). Same-level
rank-boundary connections arrive from both owner ranks with identical
values (either record is equally valid); level-crossing connections are
contributed exactly once, from the smaller-level side.

The local transmissibilities are finished before the INIT extract in
this branch; finishTransmissibilities() is idempotent, so the later
call is a no-op. Nothing is recomputed on the I/O rank, there is no
global-property switch, and no whole-grid transmissibility object is
stored -- in parallel LGR runs the writer receives none at all. Serial
runs and parallel runs without LGRs are unchanged: with empty gathered
maps the writer queries the whole-grid transmissibility object exactly
as before.
---
 CMakeLists_files.cmake                        |   1 +
 opm/simulators/flow/EclGenericWriter.hpp      |  38 ++++
 opm/simulators/flow/EclGenericWriter_impl.hpp |  73 ++++++-
 opm/simulators/flow/FlowProblemBlackoil.hpp   |  31 ++-
 opm/simulators/flow/LgrOutputTransGather.hpp  | 195 ++++++++++++++++++
 5 files changed, 331 insertions(+), 7 deletions(-)
 create mode 100644 opm/simulators/flow/LgrOutputTransGather.hpp
diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake
index 5186891ce8e..b06ddc31896 100644
--- a/CMakeLists_files.cmake
+++ b/CMakeLists_files.cmake
@@ -1036,6 +1036,7 @@ list (APPEND PUBLIC_HEADER_FILES
   opm/simulators/flow/HybridNewtonConfig.hpp
   opm/simulators/flow/InterRegFlows.hpp
   opm/simulators/flow/KeywordValidation.hpp
+  opm/simulators/flow/LgrOutputTransGather.hpp
   opm/simulators/flow/LogOutputHelper.hpp
   opm/simulators/flow/Main.hpp
   opm/simulators/flow/MechContainer.hpp
diff --git a/opm/simulators/flow/EclGenericWriter.hpp b/opm/simulators/flow/EclGenericWriter.hpp
index f91798d312e..2f89c2df306 100644
--- a/opm/simulators/flow/EclGenericWriter.hpp
+++ b/opm/simulators/flow/EclGenericWriter.hpp
@@ -36,6 +36,7 @@
 #include <opm/simulators/flow/Transmissibility.hpp>
 #include <opm/simulators/timestepping/SimulatorReport.hpp>
 
+#include <array>
 #include <map>
 #include <memory>
 #include <optional>
@@ -93,6 +94,24 @@ class EclGenericWriter
         globalTrans_ = globalTrans;
     }
 
+    // Parallel LGR INIT output: connection transmissibilities gathered from the
+    // ranks' own (distributed) simulator transmissibilities. When non-empty,
+    // computeTrans_ / exportNncStructure_ take values from these instead of
+    // querying a whole-grid transmissibility object. Complete on the I/O rank only.
+
+    // Same-level connections, keyed by (level, min/max level-Cartesian index).
+    void setGatheredLgrTrans(std::map<std::array<int,3>, double> gatheredTrans)
+    {
+        gatheredLgrTrans_ = std::move(gatheredTrans);
+    }
+
+    // Level-crossing connections (global<->LGR, LGR<->LGR NNCs), keyed by
+    // (smaller level, its level-Cartesian index, larger level, its level-Cartesian index).
+    void setGatheredLgrNncTrans(std::map<std::array<int,4>, double> gatheredNncTrans)
+    {
+        gatheredLgrNncTrans_ = std::move(gatheredNncTrans);
+    }
+
     void setSubStepReport(const SimulatorReportSingle& report)
     {
         sub_step_report_ = report;
@@ -118,6 +137,23 @@ class EclGenericWriter
     const TransmissibilityType& globalTrans() const;
     unsigned int gridEquilIdxToGridIdx(unsigned int elemIndex) const;
 
+    // Output transmissibility value for a same-level connection: from the gathered
+    // per-rank simulator transmissibilities when available (parallel LGR runs),
+    // otherwise from the whole-grid transmissibility object (c1, c2).
+    double sameLevelOutputTrans_(int level,
+                                 int minLevelCartIdx,
+                                 int maxLevelCartIdx,
+                                 unsigned c1,
+                                 unsigned c2) const;
+
+    // Same, for a level-crossing (NNC) connection.
+    double crossLevelOutputTrans_(int smallerLevel,
+                                  int smallerLevelCartIdx,
+                                  int largerLevel,
+                                  int largerLevelCartIdx,
+                                  unsigned c1,
+                                  unsigned c2) const;
+
     void doWriteOutput(const int                          reportStepNum,
                        const std::optional<int>           timeStepNum,
                        const bool                         isSubStep,
@@ -165,6 +201,8 @@ class EclGenericWriter
     std::unique_ptr<TaskletRunner> taskletRunner_;
     Scalar restartTimeStepSize_;
     const TransmissibilityType* globalTrans_ = nullptr;
+    std::map<std::array<int,3>, double> gatheredLgrTrans_;
+    std::map<std::array<int,4>, double> gatheredLgrNncTrans_;
     const Dune::CartesianIndexMapper<Grid>& cartMapper_;
     const Dune::CartesianIndexMapper<EquilGrid>* equilCartMapper_;
     const EquilGrid* equilGrid_;
diff --git a/opm/simulators/flow/EclGenericWriter_impl.hpp b/opm/simulators/flow/EclGenericWriter_impl.hpp
index dc178d77acb..6acca00e62a 100644
--- a/opm/simulators/flow/EclGenericWriter_impl.hpp
+++ b/opm/simulators/flow/EclGenericWriter_impl.hpp
@@ -66,6 +66,7 @@
 #include <functional>
 #include <map>
 #include <memory>
+#include <stdexcept>
 #include <string>
 #include <unordered_map>
 #include <utility>
@@ -295,6 +296,8 @@ writeInit()
         }
         this->outputTrans_.reset();
     }
+    this->gatheredLgrTrans_.clear();
+    this->gatheredLgrNncTrans_.clear();
 }
 
 template<class Grid, class EquilGrid, class GridView, class ElementMapper, class Scalar>
@@ -574,12 +577,14 @@ computeTrans_(const std::vector<std::unordered_map<int,int>>&  levelCartToLevelC
             }
 
             if (maxLevelCartIdx - minLevelCartIdx == 1 && levelCartDims[0] > 1 ) {
-                outputTrans_->at(level).at("TRANX").template data<double>()[minLevelCartIdx] = globalTrans().transmissibility(c1, c2);
+                outputTrans_->at(level).at("TRANX").template data<double>()[minLevelCartIdx] =
+                    sameLevelOutputTrans_(level, minLevelCartIdx, maxLevelCartIdx, c1, c2);
                 continue; // skip other if clauses as they are false, last one needs some computation
             }
 
             if (maxLevelCartIdx - minLevelCartIdx == levelCartDims[0] && levelCartDims[1] > 1) {
-                outputTrans_->at(level).at("TRANY").template data<double>()[minLevelCartIdx] = globalTrans().transmissibility(c1, c2);
+                outputTrans_->at(level).at("TRANY").template data<double>()[minLevelCartIdx] =
+                    sameLevelOutputTrans_(level, minLevelCartIdx, maxLevelCartIdx, c1, c2);
                 continue; // skipt next if clause as it needs some computation
             }
 
@@ -588,7 +593,8 @@ computeTrans_(const std::vector<std::unordered_map<int,int>>&  levelCartToLevelC
                                          levelCartToLevelCompressed[level],
                                          minLevelCartIdx,
                                          maxLevelCartIdx)) {
-                outputTrans_->at(level).at("TRANZ").template data<double>()[minLevelCartIdx] = globalTrans().transmissibility(c1, c2);
+                outputTrans_->at(level).at("TRANZ").template data<double>()[minLevelCartIdx] =
+                    sameLevelOutputTrans_(level, minLevelCartIdx, maxLevelCartIdx, c1, c2);
             }
         }
     }
@@ -720,7 +726,8 @@ exportNncStructure_(const std::vector<std::unordered_map<int,int>>& levelCartToL
                 const auto& [smallerLevel, smallerLevelCartIdx] = smallerPair;
                 const auto& [largerLevel, largerLevelCartIdx] = largerPair;
 
-                auto t = this->globalTrans().transmissibility(c1, c2);
+                auto t = this->crossLevelOutputTrans_(smallerLevel, smallerLevelCartIdx,
+                                                      largerLevel, largerLevelCartIdx, c1, c2);
 
                 // ECLIPSE ignores NNCs with zero transmissibility
                 // (different threshold than for NNC with corresponding
@@ -781,7 +788,7 @@ exportNncStructure_(const std::vector<std::unordered_map<int,int>>& levelCartToL
                                           levelCartIdxIn, levelCartIdxOut)) {
                     // We need to check whether an NNC for this face was also
                     // specified via the NNC keyword in the deck.
-                    auto t = this->globalTrans().transmissibility(c1, c2);
+                    auto t = this->sameLevelOutputTrans_(level, levelCartIdxIn, levelCartIdxOut, c1, c2);
 
                     if (level == 0) {
                         auto candidate = std::lower_bound(nncData.begin(), nncData.end(),
@@ -879,7 +886,8 @@ exportNncStructure_(const std::vector<std::unordered_map<int,int>>& levelCartToL
                     continue;
                 }
 
-                trans = this->globalTrans().transmissibility(c1, c2);
+                trans = this->sameLevelOutputTrans_(0, static_cast<int>(entry.cell1),
+                                                    static_cast<int>(entry.cell2), c1, c2);
 
                 if (! generatedNnc.empty()) {
                     for (const auto& generated : generatedNnc) {
@@ -1101,6 +1109,59 @@ evalSummary(const int                                            reportStepNum,
 #endif
 }
 
+template<class Grid, class EquilGrid, class GridView, class ElementMapper, class Scalar>
+double
+EclGenericWriter<Grid,EquilGrid,GridView,ElementMapper,Scalar>::
+sameLevelOutputTrans_(int level,
+                      int minLevelCartIdx,
+                      int maxLevelCartIdx,
+                      unsigned c1,
+                      unsigned c2) const
+{
+    if (!gatheredLgrTrans_.empty()) {
+        const auto candidate = gatheredLgrTrans_.find(
+            std::array{level, minLevelCartIdx, maxLevelCartIdx});
+        if (candidate == gatheredLgrTrans_.end()) {
+            throw std::logic_error {
+                "Gathered LGR transmissibilities: no value for connection (level " +
+                std::to_string(level) + ", " + std::to_string(minLevelCartIdx) +
+                " -> " + std::to_string(maxLevelCartIdx) + ')'
+            };
+        }
+        return candidate->second;
+    }
+    return this->globalTrans().transmissibility(c1, c2);
+}
+
+template<class Grid, class EquilGrid, class GridView, class ElementMapper, class Scalar>
+double
+EclGenericWriter<Grid,EquilGrid,GridView,ElementMapper,Scalar>::
+crossLevelOutputTrans_(int smallerLevel,
+                       int smallerLevelCartIdx,
+                       int largerLevel,
+                       int largerLevelCartIdx,
+                       unsigned c1,
+                       unsigned c2) const
+{
+    // Mode test on the same-level map: it is non-empty in every parallel LGR run,
+    // while this map may legitimately be empty (it only guards against a silent
+    // fallback for a connection class the gather does cover).
+    if (!gatheredLgrTrans_.empty()) {
+        const auto candidate = gatheredLgrNncTrans_.find(
+            std::array{smallerLevel, smallerLevelCartIdx, largerLevel, largerLevelCartIdx});
+        if (candidate == gatheredLgrNncTrans_.end()) {
+            throw std::logic_error {
+                "Gathered LGR NNC transmissibilities: no value for connection (level " +
+                std::to_string(smallerLevel) + ", " + std::to_string(smallerLevelCartIdx) +
+                " -> level " + std::to_string(largerLevel) + ", " +
+                std::to_string(largerLevelCartIdx) + ')'
+            };
+        }
+        return candidate->second;
+    }
+    return this->globalTrans().transmissibility(c1, c2);
+}
+
 template<class Grid, class EquilGrid, class GridView, class ElementMapper, class Scalar>
 const typename EclGenericWriter<Grid,EquilGrid,GridView,ElementMapper,Scalar>::TransmissibilityType&
 EclGenericWriter<Grid,EquilGrid,GridView,ElementMapper,Scalar>::
diff --git a/opm/simulators/flow/FlowProblemBlackoil.hpp b/opm/simulators/flow/FlowProblemBlackoil.hpp
index bb5a4348a2a..015f9480d19 100644
--- a/opm/simulators/flow/FlowProblemBlackoil.hpp
+++ b/opm/simulators/flow/FlowProblemBlackoil.hpp
@@ -52,6 +52,7 @@
 #include <opm/simulators/flow/FlowProblem.hpp>
 #include <opm/simulators/flow/FlowProblemBlackoilProperties.hpp>
 #include <opm/simulators/flow/FlowThresholdPressure.hpp>
+#include <opm/simulators/flow/LgrOutputTransGather.hpp>
 #include <opm/simulators/flow/MixingRateControls.hpp>
 #include <opm/simulators/flow/OutputBlackoilModule.hpp>
 #include <opm/simulators/flow/VtkTracerModule.hpp>
@@ -72,6 +73,7 @@
 #include <stdexcept>
 #include <string>
 #include <string_view>
+#include <type_traits>
 #include <vector>
 
 namespace Opm {
@@ -347,8 +349,35 @@ class FlowProblemBlackoil : public FlowProblem<TypeTag>
         // we try to avoid for the parallel running, has both global trans_ and transmissibilities_ allocated at the same time
         if (enableEclOutput_) {
             if (simulator.vanguard().grid().comm().size() > 1) {
-                if (simulator.vanguard().grid().comm().rank() == 0)
+                bool wholeGridTransNeeded = simulator.vanguard().grid().comm().rank() == 0;
+                // Parallel LGR: reuse the simulator's own (distributed) transmissibilities for the
+                // INIT output -- each rank contributes its interior connections, gathered on the
+                // I/O rank and keyed by level-Cartesian indices so the output walk over the global
+                // (equil) grid can look them up directly. This reuses the values already computed
+                // in parallel for the simulation itself instead of recomputing a whole-grid
+                // transmissibility. finishTransmissibilities() is idempotent; calling it here only
+                // moves the (anyway required) local trans build before the INIT write.
+                // grid().maxLevel() is identical on every rank: CpGrid::maxLevel counts the level
+                // grids, one per global LGR, so it is not this rank's partition depth.
+                if constexpr (std::is_same_v<GetPropType<TypeTag, Properties::Grid>, Dune::CpGrid>) {
+                    if (simulator.vanguard().grid().maxLevel() > 0) {
+                        finishTransmissibilities();
+                        const auto& localTrans = simulator.problem().eclTransmissibilities();
+                        auto gathered =
+                            gatherLgrOutputTrans(simulator.vanguard().grid(),
+                                                 simulator.vanguard().gridView(),
+                                                 [&localTrans](unsigned c1, unsigned c2)
+                                                 { return static_cast<double>(localTrans.transmissibility(c1, c2)); });
+                        eclWriter_->setGatheredLgrTrans(std::move(gathered.sameLevel));
+                        eclWriter_->setGatheredLgrNncTrans(std::move(gathered.crossLevel));
+                        // All output values (TRANX/Y/Z and NNC) come from the gathered maps --
+                        // no whole-grid transmissibility object is needed on the I/O rank.
+                        wholeGridTransNeeded = false;
+                    }
+                }
+                if (wholeGridTransNeeded) {
                     eclWriter_->setTransmissibilities(&simulator.vanguard().globalTransmissibility());
+                }
             } else {
                 finishTransmissibilities();
                 eclWriter_->setTransmissibilities(&simulator.problem().eclTransmissibilities());
diff --git a/opm/simulators/flow/LgrOutputTransGather.hpp b/opm/simulators/flow/LgrOutputTransGather.hpp
new file mode 100644
index 00000000000..f9b0ca25651
--- /dev/null
+++ b/opm/simulators/flow/LgrOutputTransGather.hpp
@@ -0,0 +1,195 @@
+// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+// vi: set et ts=4 sw=4 sts=4:
+/*
+  This file is part of the Open Porous Media project (OPM).
+
+  OPM is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 2 of the License, or
+  (at your option) any later version.
+
+  OPM is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
+
+  Consult the COPYING file in the top-level source directory of this
+  module for the precise wording of the license and the list of
+  copyright holders.
+*/
+#ifndef OPM_LGR_OUTPUT_TRANS_GATHER_HPP
+#define OPM_LGR_OUTPUT_TRANS_GATHER_HPP
+
+#include <dune/grid/common/mcmgmapper.hh>
+#include <dune/grid/common/partitionset.hh>
+
+#include <opm/grid/cpgrid/LevelCartesianIndexMapper.hpp>
+
+#include <array>
+#include <cstddef>
+#include <map>
+#include <vector>
+
+namespace Opm {
+
+/// Connection transmissibilities gathered for parallel LGR INIT output.
+///
+/// sameLevel:  key (level, min level-Cartesian index, max level-Cartesian index)
+///             -- every connection between two cells of the same level grid
+///                (TRANX/TRANY/TRANZ and same-level NNCs).
+/// crossLevel: key (smaller level, its level-Cartesian index,
+///                  larger level, its level-Cartesian index)
+///             -- every connection between cells of different level grids
+///                (global<->LGR and LGR<->LGR NNCs; TRANGL/TRANLL).
+struct GatheredLgrOutputTrans
+{
+    std::map<std::array<int,3>, double> sameLevel;
+    std::map<std::array<int,4>, double> crossLevel;
+};
+
+namespace detail {
+
+/// Gather flattened (key, value) records on rank 0 and unpack them into a map.
+/// keys holds N ints per record, values one double per record.
+template <std::size_t N, class Comm>
+void gatherLgrTransRecords(const Comm& comm,
+                           const std::vector<int>& keys,
+                           const std::vector<double>& values,
+                           std::map<std::array<int,N>, double>& gathered)
+{
+    auto unpack = [&gathered](const std::vector<int>& allKeys,
+                              const std::vector<double>& allValues)
+    {
+        for (std::size_t i = 0; i < allValues.size(); ++i) {
+            std::array<int,N> key;
+            for (std::size_t j = 0; j < N; ++j) {
+                key[j] = allKeys[N*i + j];
+            }
+            // insert_or_assign rather than try_emplace: a key arriving twice (a same-level
+            // connection at a rank boundary, reported by both owner ranks with identical
+            // values) overwrites -- either record is equally valid.
+            gathered.insert_or_assign(key, allValues[i]);
+        }
+    };
+
+    if (comm.size() == 1) {
+        unpack(keys, values);
+        return;
+    }
+
+    const int localCount = static_cast<int>(values.size());
+    std::vector<int> counts(comm.size(), 0);
+    comm.gather(&localCount, counts.data(), 1, 0);
+
+    std::vector<int> valueDispls(comm.size(), 0);
+    std::vector<int> keyCounts(comm.size(), 0);
+    std::vector<int> keyDispls(comm.size(), 0);
+    int totalCount = 0;
+    if (comm.rank() == 0) {
+        for (int r = 0; r < comm.size(); ++r) {
+            valueDispls[r] = totalCount;
+            keyCounts[r] = static_cast<int>(N) * counts[r];
+            keyDispls[r] = static_cast<int>(N) * totalCount;
+            totalCount += counts[r];
+        }
+    }
+
+    std::vector<int> allKeys(comm.rank() == 0 ? N * totalCount : 0);
+    std::vector<double> allValues(comm.rank() == 0 ? totalCount : 0);
+    comm.gatherv(keys.data(), static_cast<int>(N) * localCount,
+                 allKeys.data(), keyCounts.data(), keyDispls.data(), 0);
+    comm.gatherv(values.data(), localCount,
+                 allValues.data(), counts.data(), valueDispls.data(), 0);
+
+    if (comm.rank() == 0) {
+        unpack(allKeys, allValues);
+    }
+}
+
+} // namespace detail
+
+/// Gather the simulator's own (distributed) transmissibilities for parallel LGR INIT output.
+///
+/// Each rank walks its interior leaf cells and records every connection it owns, keyed by
+/// level-Cartesian indices (see GatheredLgrOutputTrans), then the records are gathered on the
+/// I/O rank (rank 0). The level-Cartesian key is geometrically canonical -- defined by the LGR
+/// specification, identical on the distributed grid and the undistributed (equil) copy -- so
+/// the I/O rank's output walk over the equil grid can look values up directly.
+///
+/// Same-level connections at rank boundaries are contributed by both owner ranks with identical
+/// values (the cells' properties are present on both sides), so duplicates are benign.
+/// Cross-level connections are contributed exactly once: only from the smaller-level side.
+///
+/// This reuses the values the simulation itself computed in parallel instead of recomputing a
+/// whole-grid transmissibility on the I/O rank.
+///
+/// \return the complete maps on rank 0; empty maps on all other ranks.
+template <class GridView, class TransFn>
+GatheredLgrOutputTrans
+gatherLgrOutputTrans(const Dune::CpGrid& grid,
+                     const GridView& gridView,
+                     TransFn&& transFn)
+{
+    std::vector<int> sameKeys;      // flattened: level, minCart, maxCart per record
+    std::vector<double> sameValues;
+    std::vector<int> crossKeys;     // flattened: smallLevel, smallCart, largeLevel, largeCart
+    std::vector<double> crossValues;
+
+    const LevelCartesianIndexMapper<Dune::CpGrid> levelCartMapp(grid);
+    const Dune::MultipleCodimMultipleGeomTypeMapper<GridView>
+        elemMapper(gridView, Dune::mcmgElementLayout());
+
+    for (const auto& elem : elements(gridView, Dune::Partitions::interior)) {
+        for (const auto& is : intersections(gridView, elem)) {
+            if (!is.neighbor()) {
+                continue;
+            }
+
+            const int levelIn = is.inside().level();
+            const int levelOut = is.outside().level();
+
+            if (levelIn != levelOut) {
+                if (levelIn > levelOut) {
+                    continue; // recorded exactly once, from the smaller-level side
+                }
+
+                crossKeys.push_back(levelIn);
+                crossKeys.push_back(levelCartMapp.cartesianIndex(
+                    is.inside().getLevelElem().index(), levelIn));
+                crossKeys.push_back(levelOut);
+                crossKeys.push_back(levelCartMapp.cartesianIndex(
+                    is.outside().getLevelElem().index(), levelOut));
+                crossValues.push_back(transFn(elemMapper.index(is.inside()),
+                                              elemMapper.index(is.outside())));
+                continue;
+            }
+
+            const int cartIn = levelCartMapp.cartesianIndex(
+                is.inside().getLevelElem().index(), levelIn);
+            const int cartOut = levelCartMapp.cartesianIndex(
+                is.outside().getLevelElem().index(), levelIn);
+
+            if (cartIn > cartOut) {
+                continue; // record each connection once, in canonical direction
+            }
+
+            sameKeys.push_back(levelIn);
+            sameKeys.push_back(cartIn);
+            sameKeys.push_back(cartOut);
+            sameValues.push_back(transFn(elemMapper.index(is.inside()),
+                                         elemMapper.index(is.outside())));
+        }
+    }
+
+    GatheredLgrOutputTrans gathered;
+    detail::gatherLgrTransRecords(grid.comm(), sameKeys, sameValues, gathered.sameLevel);
+    detail::gatherLgrTransRecords(grid.comm(), crossKeys, crossValues, gathered.crossLevel);
+    return gathered;
+}
+
+} // namespace Opm
+
+#endif // OPM_LGR_OUTPUT_TRANS_GATHER_HPP

From 1d926f711d86403956c44a4cc41eeceb434f90ad Mon Sep 17 00:00:00 2001
From: arturcastiel <arturcastiel@gmail.com>
Date: Thu, 2 Jul 2026 12:54:00 +0000
Subject: [PATCH 2/2] tests: reuse parallel regression driver for LGR INIT
 compare

Add a serial-vs-parallel regression for the parallel LGR INIT output
using existing infrastructure:

- run-parallel-regressionTest.sh gains a backward-compatible "-m <mode>"
  flag (default "summary" keeps the current behaviour for every existing
  caller; "init" does a dry run and compares EGRID+INIT only, ignoring
  the parallel-only MPI_RANK keyword).
- add_test_compare_parallel_simulation gains an optional COMPARE_MODE
  parameter that forwards "-m init" and names the test
  compareParallelInitSim_<sim>+<case>.
- The test is registered against the existing SPE1CASE1_CARFIN deck
  (opm-tests/lgr), which has two 12-host-cell LGRs on a 10x10x3 grid.
  Under the default 4-rank partition the LGR host cells land on
  multiple ranks (rank 0 marks 9 of the 24 host cells during local
  refinement), so the compare exercises rank-boundary-straddling LGRs.

The registration sits before the opm_set_test_driver switch to
run-comparison.sh so it picks up the run-parallel-regressionTest.sh
driver that understands "-m".
---
 compareECLFiles.cmake                | 16 ++++++-
 parallelTests.cmake                  | 27 ++++++++++++
 tests/run-parallel-regressionTest.sh | 63 +++++++++++++++++++++-------
 3 files changed, 89 insertions(+), 17 deletions(-)

diff --git a/compareECLFiles.cmake b/compareECLFiles.cmake
index 3e4103826ae..823b25825ed 100644
--- a/compareECLFiles.cmake
+++ b/compareECLFiles.cmake
@@ -289,6 +289,7 @@ function(add_test_compare_parallel_simulation)
     DIR
     POSTFIX
     MPI_PROCS
+    COMPARE_MODE
   )
   set(multiValueArgs TEST_ARGS)
   cmake_parse_arguments(PARAM "$" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )
@@ -313,6 +314,14 @@ function(add_test_compare_parallel_simulation)
     set(PARAM_SIMULATOR ${PARAM_DEV_SIMULATOR})
   endif()
 
+  # COMPARE_MODE "init": dry-run, compares EGRID+INIT only (parallel INIT-file regressions,
+  # e.g. parallel LGR transmissibility). Default (unset): compares SMRY+UNRST as before.
+  if(PARAM_COMPARE_MODE STREQUAL "init")
+    set(TEST_NAME_PREFIX compareParallelInitSim_)
+  else()
+    set(TEST_NAME_PREFIX compareParallelSim_)
+  endif()
+
   if(MPIEXEC_MAX_NUMPROCS GREATER_EQUAL MPI_PROCS)
     # Local computer system has at least ${MPI_PROCS} CPUs. Register test.
     set(RESULT_PATH ${BASE_RESULT_PATH}/parallel/${PARAM_SIMULATOR}+${PARAM_CASENAME})
@@ -325,9 +334,12 @@ function(add_test_compare_parallel_simulation)
                     -t ${PARAM_REL_TOL}
                     -c $<TARGET_FILE:compareECL>
                     -n ${MPI_PROCS})
+    if(PARAM_COMPARE_MODE STREQUAL "init")
+      list(APPEND DRIVER_ARGS -m init)
+    endif()
 
     # Add test that runs flow_mpi and outputs the results to file
-    opm_add_test(compareParallelSim_${PARAM_SIMULATOR}+${PARAM_FILENAME}${PARAM_POSTFIX}
+    opm_add_test(${TEST_NAME_PREFIX}${PARAM_SIMULATOR}+${PARAM_FILENAME}${PARAM_POSTFIX}
       EXE_TARGET
         ${PARAM_SIMULATOR}
       DRIVER_ARGS
@@ -335,7 +347,7 @@ function(add_test_compare_parallel_simulation)
       TEST_ARGS
         ${TEST_ARGS}
     )
-    set_tests_properties(compareParallelSim_${PARAM_SIMULATOR}+${PARAM_FILENAME}${PARAM_POSTFIX}
+    set_tests_properties(${TEST_NAME_PREFIX}${PARAM_SIMULATOR}+${PARAM_FILENAME}${PARAM_POSTFIX}
                          PROPERTIES PROCESSORS ${MPI_PROCS})
   endif()
 endfunction()
diff --git a/parallelTests.cmake b/parallelTests.cmake
index d08be1510e2..f1956f27537 100644
--- a/parallelTests.cmake
+++ b/parallelTests.cmake
@@ -946,6 +946,32 @@ add_test_compare_parallel_simulation(
     --linear-solver-reduction=1e-7
 )
 
+# Parallel LGR INIT/EGRID regression: reuses the existing SPE1CASE1_CARFIN deck (opm-tests/lgr,
+# already registered for spe1case1_carfin/spe1case1_carfin_parallel in compareECLFiles.cmake) and
+# the existing run-parallel-regressionTest.sh driver (COMPARE_MODE init: dry-run, compares
+# EGRID+INIT instead of SMRY+UNRST). Guards the parallel LGR INIT transmissibility output
+# (CpGridVanguard::refinedGlobalTransmissibility) -- pre-fix, the parallel run deadlocks here.
+add_test_compare_parallel_simulation(
+  CASENAME
+    spe1case1_carfin
+  FILENAME
+    SPE1CASE1_CARFIN
+  SIMULATOR
+    flow
+  DEV_SIMULATOR
+    flow_blackoil
+  DIR
+    lgr
+  ABS_TOL
+    1e-3
+  REL_TOL
+    1e-5
+  COMPARE_MODE
+    init
+  TEST_ARGS
+    --parsing-strictness=low
+)
+
 opm_set_test_driver(${PROJECT_SOURCE_DIR}/tests/run-comparison.sh "")
 
 add_test_compareSeparateECLFiles(
@@ -1011,3 +1037,4 @@ add_test_compareSeparateECLFiles(
     --matrix-add-well-contributions=true
     --linear-solver=ilu0
 )
+
diff --git a/tests/run-parallel-regressionTest.sh b/tests/run-parallel-regressionTest.sh
index bf35fad7f88..c971e5ad51a 100755
--- a/tests/run-parallel-regressionTest.sh
+++ b/tests/run-parallel-regressionTest.sh
@@ -17,13 +17,18 @@ then
   echo -e "\t\t -e <filename> Simulator binary to use"
   echo -e "\tOptional options:"
   echo -e "\t\t -n <procs>    Number of MPI processes to use"
+  echo -e "\t\t -m <mode>     Comparison mode: summary (default, compares SMRY+UNRST"
+  echo -e "\t\t               against a normal run) or init (dry-run, compares"
+  echo -e "\t\t               EGRID+INIT only -- for tracking parallel INIT-file"
+  echo -e "\t\t               regressions, e.g. parallel LGR transmissibility)"
   exit 1
 fi
 
 MPI_PROCS=4
+MODE=summary
 OPTIND=1
 
-while getopts "i:r:f:a:t:c:e:n:" OPT
+while getopts "i:r:f:a:t:c:e:n:m:" OPT
 do
   case "${OPT}" in
     i) INPUT_DATA_PATH=${OPTARG} ;;
@@ -34,38 +39,66 @@ do
     c) COMPARE_ECL_COMMAND=${OPTARG} ;;
     e) EXE_NAME=${OPTARG} ;;
     n) MPI_PROCS=${OPTARG} ;;
+    m) MODE=${OPTARG} ;;
   esac
 done
 shift $(($OPTIND-1))
 TEST_ARGS="$@"
 
+if [ "${MODE}" = "init" ]
+then
+  RUN_FLAG="--enable-dry-run=true --enable-ecl-output=true"
+else
+  RUN_FLAG="--enable-opm-rst-file=true"
+fi
+
 rm -Rf ${RESULT_PATH}
 mkdir -p ${RESULT_PATH}
 cd ${RESULT_PATH}
-"${EXE_NAME}" ${TEST_ARGS} --enable-opm-rst-file=true --output-dir=${RESULT_PATH}
+"${EXE_NAME}" ${TEST_ARGS} ${RUN_FLAG} --output-dir=${RESULT_PATH}
 
 test $? -eq 0 || exit 1
 mkdir mpi
 cd mpi
-mpirun -np ${MPI_PROCS} "${EXE_NAME}" ${TEST_ARGS} --enable-opm-rst-file=true --output-dir=${RESULT_PATH}/mpi
+mpirun -np ${MPI_PROCS} "${EXE_NAME}" ${TEST_ARGS} ${RUN_FLAG} --output-dir=${RESULT_PATH}/mpi
 test $? -eq 0 || exit 1
 cd ..
 
 ecode=0
-echo "=== Executing comparison for summary file ==="
-${COMPARE_ECL_COMMAND} -t SMRY -R ${RESULT_PATH}/${FILENAME} ${RESULT_PATH}/mpi/${FILENAME} ${ABS_TOL} ${REL_TOL}
-if [ $? -ne 0 ]
+if [ "${MODE}" = "init" ]
 then
-  ecode=1
-  ${COMPARE_ECL_COMMAND} -t SMRY -a -R ${RESULT_PATH}/${FILENAME} ${RESULT_PATH}/mpi/${FILENAME} ${ABS_TOL} ${REL_TOL}
-fi
+  echo "=== Executing comparison for EGRID file ==="
+  ${COMPARE_ECL_COMMAND} -t EGRID ${RESULT_PATH}/${FILENAME} ${RESULT_PATH}/mpi/${FILENAME} ${ABS_TOL} ${REL_TOL}
+  if [ $? -ne 0 ]
+  then
+    ecode=1
+    ${COMPARE_ECL_COMMAND} -a -t EGRID ${RESULT_PATH}/${FILENAME} ${RESULT_PATH}/mpi/${FILENAME} ${ABS_TOL} ${REL_TOL}
+  fi
 
-echo "=== Executing comparison for restart file ==="
-${COMPARE_ECL_COMMAND} -l -t UNRST ${RESULT_PATH}/${FILENAME} ${RESULT_PATH}/mpi/${FILENAME} ${ABS_TOL} ${REL_TOL}
-if [ $? -ne 0 ]
-then
-  ecode=1
-  ${COMPARE_ECL_COMMAND} -a -l -t UNRST ${RESULT_PATH}/${FILENAME} ${RESULT_PATH}/mpi/${FILENAME} ${ABS_TOL} ${REL_TOL}
+  # -x ignores the parallel-only MPI_RANK keyword, which has no serial counterpart to compare against.
+  echo "=== Executing comparison for INIT file (ignoring parallel-only MPI_RANK) ==="
+  ${COMPARE_ECL_COMMAND} -t INIT -x ${RESULT_PATH}/${FILENAME} ${RESULT_PATH}/mpi/${FILENAME} ${ABS_TOL} ${REL_TOL}
+  if [ $? -ne 0 ]
+  then
+    ecode=1
+    ${COMPARE_ECL_COMMAND} -a -t INIT -x ${RESULT_PATH}/${FILENAME} ${RESULT_PATH}/mpi/${FILENAME} ${ABS_TOL} ${REL_TOL}
+  fi
+else
+  echo "=== Executing comparison for summary file ==="
+  ${COMPARE_ECL_COMMAND} -t SMRY -R ${RESULT_PATH}/${FILENAME} ${RESULT_PATH}/mpi/${FILENAME} ${ABS_TOL} ${REL_TOL}
+  if [ $? -ne 0 ]
+  then
+    ecode=1
+    ${COMPARE_ECL_COMMAND} -t SMRY -a -R ${RESULT_PATH}/${FILENAME} ${RESULT_PATH}/mpi/${FILENAME} ${ABS_TOL} ${REL_TOL}
+  fi
+
+  echo "=== Executing comparison for restart file ==="
+  ${COMPARE_ECL_COMMAND} -l -t UNRST ${RESULT_PATH}/${FILENAME} ${RESULT_PATH}/mpi/${FILENAME} ${ABS_TOL} ${REL_TOL}
+  if [ $? -ne 0 ]
+  then
+    ecode=1
+    ${COMPARE_ECL_COMMAND} -a -l -t UNRST ${RESULT_PATH}/${FILENAME} ${RESULT_PATH}/mpi/${FILENAME} ${ABS_TOL} ${REL_TOL}
+  fi
 fi
 
 exit $ecode