Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists_files.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1301,6 +1301,9 @@ if (HAVE_AVX2_EXTENSION)
opm/simulators/linalg/mixed/prec.h
opm/simulators/linalg/mixed/vec.h
opm/simulators/linalg/mixed/wrapper.hpp
opm/simulators/linalg/mixed/MatrixWrapper.hpp
opm/simulators/linalg/mixed/PreconditionerWrapper.hpp
opm/simulators/linalg/mixed/SolverAdapter.hpp
)
endif()

Expand Down
12 changes: 11 additions & 1 deletion opm/simulators/linalg/FlexibleSolver_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

#if HAVE_AVX2_EXTENSION
#include <opm/simulators/linalg/mixed/wrapper.hpp>
#include <opm/simulators/linalg/mixed/SolverAdapter.hpp>
#endif

#include <dune/common/fmatrix.hh>
Expand Down Expand Up @@ -193,7 +194,7 @@ namespace Dune
OPM_THROW(std::invalid_argument, "mixed-bicgstab solver not supported for single precision.");
} else {
const std::string prec_type = prm.get<std::string>("preconditioner.type", "error");
bool use_mixed_dilu= (prec_type=="mixed-dilu");
bool use_mixed_dilu= (prec_type=="legacy-mixed-dilu");
using MatrixType = decltype(linearoperator_for_solver_->getmat());
linsolver_ = std::make_shared<Dune::MixedSolver<VectorType,MatrixType>>(
linearoperator_for_solver_->getmat(),
Expand All @@ -202,6 +203,15 @@ namespace Dune
use_mixed_dilu
);
}
// mixed-solver adapter starts here
} else if (solver_type == "mixed-precision") {
linsolver_ = std::make_shared<Dune::MixedBiCGSTABSolver<Comm,Operator,VectorType>>(linearoperator_for_solver_,
scalarproduct_,
preconditioner_,
tol, // desired residual reduction factor
maxiter, // maximum number of iterations
verbosity,
comm);
#endif
} else if (solver_type == "loopsolver") {
linsolver_ = std::make_shared<Dune::LoopSolver<VectorType>>(*linearoperator_for_solver_,
Expand Down
10 changes: 10 additions & 0 deletions opm/simulators/linalg/StandardPreconditioners_mpi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
#include <memory>
#include <type_traits>

#include <opm/simulators/linalg/mixed/PreconditionerWrapper.hpp>

namespace Opm {


Expand Down Expand Up @@ -163,6 +165,14 @@ struct StandardPreconditioners
DUNE_UNUSED_PARAMETER(prm);
return wrapBlockPreconditioner<MultithreadDILU<M, V, V>>(comm, op.getmat());
});
F::addCreator("mixed-ilu0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
DUNE_UNUSED_PARAMETER(prm);
return wrapBlockPreconditioner<MixedPreconditioner<M,V,V>>(comm, op.getmat());
});
F::addCreator("mixed-dilu", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
DUNE_UNUSED_PARAMETER(prm);
return wrapBlockPreconditioner<MixedPreconditioner<M,V,V>>(comm, op.getmat(), true);
});
F::addCreator("jac", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
const int n = prm.get<int>("repeats", 1);
const double w = prm.get<double>("relaxation", 1.0);
Expand Down
14 changes: 13 additions & 1 deletion opm/simulators/linalg/StandardPreconditioners_serial.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@
#include <memory>
#include <type_traits>


#include <opm/simulators/linalg/mixed/PreconditionerWrapper.hpp>


namespace Opm {

template <class X, class Y>
Expand Down Expand Up @@ -89,11 +93,19 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation, typen
return std::make_shared<MultithreadDILU<M, V, V>>(op.getmat());
});
F::addCreator("mixed-ilu0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
DUNE_UNUSED_PARAMETER(prm);
return std::make_shared<MixedPreconditioner<M,V,V>>(op.getmat());
});
F::addCreator("mixed-dilu", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
DUNE_UNUSED_PARAMETER(prm);
return std::make_shared<MixedPreconditioner<M,V,V>>(op.getmat(),true);
});
F::addCreator("legacy-mixed-ilu0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
DUNE_UNUSED_PARAMETER(prm);
DUNE_UNUSED_PARAMETER(op);
return std::make_shared<TrivialPreconditioner<V,V>>();
});
F::addCreator("mixed-dilu", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
F::addCreator("legacy-mixed-dilu", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
DUNE_UNUSED_PARAMETER(prm);
DUNE_UNUSED_PARAMETER(op);
return std::make_shared<TrivialPreconditioner<V,V>>();
Expand Down
97 changes: 97 additions & 0 deletions opm/simulators/linalg/mixed/MatrixWrapper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#ifndef OPM_MIXED_MATRIX_HEADER_INCLUDED
#define OPM_MIXED_MATRIX_HEADER_INCLUDED


#include <opm/simulators/linalg/mixed/bsr.h>

namespace Opm
{

//! @brief Wraps c-implementation of mixed-precision matrix
//!
//! @note matrix is stored in single precision, but all
//! operations are performed in double-precision
//!
//! @tparam Vector the block-vector used by linear operator
//! @tparam b block size
template <class Vector>
class MixedMatrixWrapper
{
public:

// extract block size
static constexpr auto block_size = Vector::block_type::dimension;

virtual void mv(const Vector& x, Vector& y) const;
virtual void umv(const Vector& x, Vector& y) const;
virtual void usmv(double alpha, const Vector& x, Vector& y) const;

//! @brief constructor
//!
//! @param nrows number of block rows
//! @param nnz number of nonzero blocks
MixedMatrixWrapper(int nrows, int nnz)
{
if constexpr(block_size!=3) OPM_THROW(std::invalid_argument, "MixedMatrixWrapper only supports block size == 3! \n");

nnz_=nnz;
M_ = bsr_alloc();
bsr_init(M_, nrows, nnz, block_size);
}

//! @brief destructor
~MixedMatrixWrapper() {bsr_free(M_);}

void update(double const *data);

int *rowptr(){return M_->rowptr;}
int *colidx(){return M_->colidx;}

private:
int nnz_;
bsr_matrix *M_;
};

template <class Vector>
void MixedMatrixWrapper<Vector>::
mv(const Vector& x, Vector& y) const
{
// mixed-precision block spmv (y = M.x)
bsr_vmspmv3(M_, &x[0][0], &y[0][0]);
}

template <class Vector>
void MixedMatrixWrapper<Vector>::
umv(const Vector& x, Vector& y) const
{
// mixed-precision block spmv with update (y += M.x)
bsr_vmspumv3(M_, &x[0][0], &y[0][0], 1.0);
}

template <class Vector>
void MixedMatrixWrapper<Vector>::
usmv(double alpha, const Vector& x, Vector& y) const
{
// scaled mixed-precision block spmv with update (y += alpha * M.x)
bsr_vmspumv3(M_, &x[0][0], &y[0][0], alpha);
}

template <class Vector>
void MixedMatrixWrapper<Vector>::
update(double const *data)
{
// transpose each dense block to make them column-major
int const b = block_size;
int const bb=b*b;
double B[bb];
for(int k=0;k<nnz_;k++)
{
for(int i=0;i<b;i++) for(int j=0;j<b;j++) B[b*j+i] = data[bb*k + b*i + j];
for(int i=0;i<bb;i++) M_->dbl[bb*k + i] = B[i];
}

// downcast to single precision
bsr_downcast(M_);

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The update of the matrix transposes the blocks of the original matrix into the new buffer, in double precision. Later it downcasts the entire matrix to its single precision buffer. Now, I don't understand why to have the double precision in the first place? After all, the operations in this wrapper only operate on the single precision matrix. Maybe I am missing something here?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your observation is correct, @SoilRos! This is an artifact from the original mixed-precision implementation where the interface was at the solver level. In that case, the double-precision matrix was required for incomplete factorization purposes. Now that the interfaces are at the preconditioner and linear operator levels, the original design has introduced some redundancies. The situation is actually slightly worse for the preconditioner. Addressing the issue is not that difficult, and will be taken care of. Of course, using column-major blocks throughout OPM would eliminate almost all redundancies ;-)

}
} // namespace Opm
#endif // OPM_MIXED_MATRIX_HEADER_INCLUDED
115 changes: 115 additions & 0 deletions opm/simulators/linalg/mixed/PreconditionerWrapper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#ifndef OPM_MIXED_PREC_HEADER_INCLUDED
#define OPM_MIXED_PREC_HEADER_INCLUDED

#include <opm/simulators/linalg/mixed/prec.h>

namespace Opm {

//! @brief Wraps c-implementation of mixed-precision preconditioner
//!
//! @tparam Vector the block-vector used by linear operator
//! @tparam M block-sparse matrix type
//! @tparam X block-vector input type
//! @tparam Y block vector output type
template <class M, class X, class Y>
class MixedPreconditioner : public Dune::PreconditionerWithUpdate<X, Y>
{
public:

using domain_type = X;
static constexpr auto block_size = domain_type::block_type::dimension;

//! @brief constructor
//!
//! @param A block-sparse matrix
//! @param use_dilu toggle between dilu or ilu0 factorization
MixedPreconditioner(const M& A, bool use_dilu = false)
{
// Access double precision matrix data
int nrows = A.N();
int nnz = A.nonzeroes();
double_data_ = &A[0][0][0][0];
prec_ = prec_alloc();

// allocate and initialize mixed-precision matrix
mixed_matrix_ = bsr_alloc();
bsr_init(mixed_matrix_, nrows, nnz, block_size);

// copy sparsity pattern from double preccision matrix
int *rows = mixed_matrix_->rowptr;
int *cols = mixed_matrix_->colidx;

int irow = 0;
int icol = 0;
rows[0] = 0;
for(auto row=A.begin(); row!=A.end(); row++)
{
for(auto col = row->begin(); col != row->end(); ++col)
{
cols[icol++] = col.index();
}
rows[irow+1] = icol;
irow++;
}

// allocate and initialize preconditioner
prec_init(prec_,mixed_matrix_);

// attribute initialization
nnz_ = nnz;
use_dilu_ = use_dilu;

// perform matrix factorization
update();
};

//! @brief destructor
~MixedPreconditioner()
{
bsr_free(mixed_matrix_);
prec_free(prec_);
}

virtual void update() override;
virtual bool hasPerfectUpdate() const override {return true;}
virtual void pre ([[maybe_unused]] X& x, [[maybe_unused]] Y& y) override {};
virtual void post ([[maybe_unused]] X& x) override {};
virtual void apply ([[maybe_unused]] X& x, [[maybe_unused]] const Y& y) override;
virtual Dune::SolverCategory::Category category() const override { return Dune::SolverCategory::sequential; };

private:
bool use_dilu_;
double const *double_data_;
bsr_matrix *mixed_matrix_;
prec_t *prec_;
int nnz_;
};

template <class M, class X, class Y>
void MixedPreconditioner<M,X,Y>::
update ()
{
// transpose each dense block to make them column-major
int b = block_size;
int bb=b*b;
double B[bb];
for(int k=0;k<nnz_;k++)
{
for(int i=0;i<b;i++) for(int j=0;j<b;j++) B[b*j+i] = double_data_[bb*k + b*i + j];
for(int i=0;i<bb;i++) mixed_matrix_->dbl[bb*k + i] = B[i];
}

use_dilu_ ? prec_dilu_factorize(prec_, mixed_matrix_) : prec_ilu0_factorize(prec_, mixed_matrix_); // choose dilu or ilu0
prec_downcast(prec_);
}

template <class M, class X, class Y>
void MixedPreconditioner<M,X,Y>::
apply ([[maybe_unused]] X& x, [[maybe_unused]] const Y& y)
{
x=y;
prec_mapply3c(prec_,&x[0][0]);
}

}
#endif // OPM_MIXED_PREC_HEADER_INCLUDED
17 changes: 11 additions & 6 deletions opm/simulators/linalg/mixed/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@ and a highly optimized mixed-precision implementation of ILU0 and DILU precondit
bicgstab. Hopefully, this will inspire the exploration of mixed-precision algorithms
in OPM.

The initial implementation is specialized for 3x3 block-sparse matrices due to their
importance in reservoir simulation and restricted to serial runs. Extending the work
to parallel runs is a matter of extending OPM's parallel infrastructure and should be
relatively straight-forward.
The initial implementations are specialized for 3x3 block-sparse matrices due to their
importance in reservoir simulation. The original implementation only works in serial.
The parallel implementation wraps the optimized mixed-precision matrix-vector multiplication
and ILU0/DILU preconditioners into building blocks for the ISTL-based implementation of
bicgstab. This sacrifices some performance for improved modularity. Extending the work to
block-sparse matrices of arbitrary block size is work in progress.

The mixed-precision solver is selected by the command-line options `--linear-solver=mixed-ilu0`
or `--linear-solver=mixed-dilu`. The command-line option `--matrix-add-well-contributions=true`
must also be set as the mixed-precision solver operates directly on block-sparse matrices, not
on linear operators as other OPM solvers do. For convenience, a wrapper similar to the one below
can be used.
on linear operators as other OPM solvers do. For convenience, a wrapper similar to the one
below can be used.

``` bash
OMP_NUM_THREADS=1 mpirun -np 1 --map-by numa --bind-to core build/bin/flow \
Expand All @@ -23,3 +25,6 @@ OMP_NUM_THREADS=1 mpirun -np 1 --map-by numa --bind-to core build/bin/flow \
--linear-solver-max-iter=1024 \
$@
```

To invoke the original serial implementation add the `legacy-` prefix to the mixed-precision
linear solver options.
Loading