From 8f9b26a2905d7e24e9194223584a69534e0211a2 Mon Sep 17 00:00:00 2001
From: Matthias Ernst <maer@fourier.ethz.ch>
Date: Wed, 1 Mar 2023 15:35:01 +0100
Subject: [PATCH 1/5] Added BALS/LAPACK support for matrix inverse inv()

---
 src/Matrix/h_matrix.cc | 112 +++++++++++++++++++++++++++++++++++----
 src/Matrix/n_matrix.cc | 115 +++++++++++++++++++++++++++++++++++++----
 2 files changed, 208 insertions(+), 19 deletions(-)

diff --git a/src/Matrix/h_matrix.cc b/src/Matrix/h_matrix.cc
index aec302f..0149028 100644
--- a/src/Matrix/h_matrix.cc
+++ b/src/Matrix/h_matrix.cc
@@ -80,6 +80,13 @@ extern "C"
                      const int*, __CLPK_doublecomplex*, const int*, double*, int*); 
   extern void zheev_(const char*, const char*, const int*, __CLPK_doublecomplex*, const int*,
                      double*, __CLPK_doublecomplex*, const int*, double*, int*);
+  extern void zgetri_(const int*, __CLPK_doublecomplex *, const int*, const int*, __CLPK_doublecomplex *,
+                      const int*, const int*);
+  extern void zhetri_(const char*, const int*, __CLPK_doublecomplex *, const int*, const int*, __CLPK_doublecomplex *,
+                      const int*);
+  extern void zgetrf_(const int*, const int*, __CLPK_doublecomplex *, const int*, int*, int*);
+  extern void zhetrf_(const char*, const int*, __CLPK_doublecomplex *, const int*, int*, __CLPK_doublecomplex *,
+                      const int*, int*);
 }
 #endif
 
@@ -3169,17 +3176,102 @@ void h_matrix::SymDiag(_matrix* (&D), _matrix* (&U)) { tqli(U, D, 1); }
 
 _matrix* h_matrix::inv()
   {
-// sosi - this will use up memory
-  int hd = rows_;			// Matrix dimension
-  i_matrix* I = new i_matrix(hd,hd);	// Construct an Identity matrix
-  int *indx;
-  indx = new int[hd];
-//  int indx[hd];				// Array for any row permutations
-  n_matrix* hLU = (n_matrix*)LU(indx);	// Perform LU decomposition of hmx
-  _matrix* hinvmx = I->LUinv(indx, hLU);
-  delete [] indx;
+#if defined(_USING_SUNPERFLIB_) || defined(_USING_LAPACK_)
+    int nrows = this->rows();
+    int ncols = this->cols();
+    _matrix* hinvmx;
+    if(nrows < 32)  // then do it the old fashioned gamma way.
+    {
+      int hd = rows_;			// Matrix dimension
+      i_matrix* I = new i_matrix(hd,hd);	// Construct an Identity matrix
+      int *indx;
+      indx = new int[hd];
+      n_matrix* hLU = (n_matrix*)LU(indx);	// Perform LU decomposition of hmx
+      hinvmx = I->LUinv(indx, hLU);
+      delete [] indx;
+    }
+    else
+    {
+        hinvmx = new n_matrix(nrows, ncols);
+        this->convert(hinvmx);
+        n_matrix* hinvmx1 = (n_matrix *)hinvmx->transpose();
+#ifdef _USING_SUNPERFLIB_
+        char uplo = 'U';  // Upper triangular...
+        int  N    = nrows;
+        int  lda  = nrows;
+        int *ipiv = new int[N];
+        int info = -55555;
+        zhetrf(uplo, N, (doublecomplex *)hinvmx1->data, lda, ipiv , &info);
+        zhetri(uplo, N, (doublecomplex *)hinvmx1->data, lda, ipiv , &info);
+#endif
+#ifdef _USING_LAPACK_
+        char uplo = 'U';  // Upper triangular...
+#if defined(__LP64__) /* this is needed on MacOS CLAPACK to make types match*/
+        int  N    = nrows;
+        int  lda  = nrows;
+        int *ipiv = new int[N];
+        int lwork = N*N;
+        int info = -55555;
+#else
+        long int  N    = nrows;
+        long int  lda  = nrows;
+        long int *ipiv = new long int[N];
+        long int lwork = N*N;
+        long int info = -55555;
+#endif
+        complex *work= new complex [2*lwork];
+        zhetrf_(&uplo, &N, (__CLPK_doublecomplex *) hinvmx1->data, &lda, ipiv, (__CLPK_doublecomplex *) work, &lwork, &info); 
+        if(info == 0)
+        { // all is well.
+        }
+        else if (info > 0)
+        { std::cerr << "\nh_matrix: zhetrf failed to converge\n";
+        }
+        else if(info == -55555)
+        { std::cerr << "\nReturn value, 'info', does not appear to have been set\n";
+        }
+        else
+        { std::cerr << "\ninfo = " << info << "\n";
+        }
+        zhetri_(&uplo, &N, (__CLPK_doublecomplex *) hinvmx1->data, &lda, ipiv, (__CLPK_doublecomplex *) work, &info); 
+#endif
+        if(info == 0)
+        { // all is well.
+        }
+        else if (info > 0)
+        { std::cerr << "\nh_matrix: Inversion failed to converge\n";
+        }
+        else if(info == -55555)
+        { std::cerr << "\nReturn value, 'info', does not appear to have been set\n";
+        }
+        else
+        { std::cerr << "\ninfo = " << info << "\n";
+        }
+
+        // Copy results back into D and U.
+        complex *hmxp = hinvmx1->data;
+        for(int i=0; i<nrows; i++)
+        { for(int j=0; j<ncols; j++)
+          { hinvmx->put(hmxp[j*nrows+i], i, j);
+          }
+        }
+        delete hinvmx1;
+        delete [] ipiv;
+#ifdef _USING_LAPACK_
+        delete [] work;
+#endif
+//      std::cerr << "h_matrix diag: using sunperflib code\n";
+    } 
+#else
+    int hd = rows_;			// Matrix dimension
+    i_matrix* I = new i_matrix(hd,hd);	// Construct an Identity matrix
+    int *indx;
+    indx = new int[hd];
+    n_matrix* hLU = (n_matrix*)LU(indx);	// Perform LU decomposition of hmx
+    _matrix* hinvmx = I->LUinv(indx, hLU);
+    delete [] indx;
+#endif
   return hinvmx; 
-//  return I->LUinv(indx, hLU);		// Return the inverse via LU*hinv=I;
   }
 
 _matrix* h_matrix::LU(int *indx)
diff --git a/src/Matrix/n_matrix.cc b/src/Matrix/n_matrix.cc
index 7728a38..1ec4ffa 100644
--- a/src/Matrix/n_matrix.cc
+++ b/src/Matrix/n_matrix.cc
@@ -77,6 +77,13 @@ extern "C"
                      const int*, __CLPK_doublecomplex*, const int*, double*, int*);
   extern void zheev_(const char*, const char*, const int*, __CLPK_doublecomplex*, const int*,
                      double*, __CLPK_doublecomplex*, const int*, double*, int*);
+  extern void zgetri_(const int*, __CLPK_doublecomplex *, const int*, int*, __CLPK_doublecomplex *,
+                      const int*, int*);
+  extern void zhetri_(const char*, const int*, __CLPK_doublecomplex *, const int*, int*, __CLPK_doublecomplex *,
+                      int*);
+  extern void zgetrf_(const int*, const int*, __CLPK_doublecomplex *, const int*, int*, int*);
+  extern void zhetrf_(const char*, const int*, __CLPK_doublecomplex *, const int*, int*, __CLPK_doublecomplex *,
+                      int*);
 }
 #endif
 
@@ -2690,22 +2697,112 @@ _matrix* n_matrix::inv()
 
   {
   if(unitary) return adjoint();			// This is real new (sosi)
-  int nd = rows_;				// Matrix dimension
-  if(nd != cols_)
-    {
+  int nrows = rows_;				// Matrix dimension
+  if(nrows != cols_)
+  {
     NMxerror(14,1);				//    Bad rectangular array use
     NMxfatal(25);				//    Cannot take inverse
-    }
+  }
+  #if defined(_USING_SUNPERFLIB_) || defined(_USING_LAPACK_)
+  _matrix *mxinv;
+  if(nrows < 16)  // then do it the old fashioned gamma way.
+  { int *indx;
+    indx = new int[nrows];
+    n_matrix* nLU = new n_matrix(*this);          // Copy input array for LU decomp
+    nLU->LU_decomp(indx);                         // LU decomposition of nLU
+    n_matrix* I = new n_matrix(nrows,nrows,complex0);   // Constuct a new n_matrix
+    for(int i=0; i<nrows; i++) I->put(complex1,i,i); // Make it the identity matrix
+    mxinv = I->LUinv(indx,nLU);          // Back solve for inverse
+    delete nLU;                                   // Clean up the nLU matrix
+    delete I;                                     // Clean up the I matrix
+    delete [] indx;
+  }
+  else
+  {
+        mxinv = new n_matrix(nrows, nrows, complex0);
+        n_matrix* mxinv1 = (n_matrix *)this->transpose();
+
+#ifdef _USING_SUNPERFLIB_
+        int  N    = nrows;
+        int  lda  = nrows;
+        int *ipiv = new int[N];
+        int info = -55555;
+        zgetrf(N, N, (doublecomplex *) mxinv1->data, lda, ipiv, &info);
+        zgetri(N, (doublecomplex *)mxinv1->data, lda, ipiv , &info);
+#endif
+#ifdef _USING_LAPACK_
+#if defined(__LP64__) /* this is needed on MacOS CLAPACK to make types match*/
+        int  N    = nrows;
+        int  lda  = nrows;
+        int *ipiv = new int[N];
+        int info = -55555;
+        int lwork = N*N;
+#else
+        long int  N    = nrows;
+        long int  lda  = nrows;
+        long int *ipiv = new long int[N];
+        long int info = -55555;
+        long int lwork = N*N;
+#endif
+        complex *work= new complex [2*lwork];
+        zgetrf_(&N, &N, (__CLPK_doublecomplex *) mxinv1->data, &lda, ipiv, &info);
+        if(info == 0)
+        { // all is well.
+        }
+        else if (info > 0)
+        { std::cerr << "\nn_matrix: zgetrf failed to converge\n";
+          std::cerr << "info = " << info << "\n";
+        }
+        else if(info == -55555)
+        { std::cerr << "\nReturn value, 'info', does not appear to have been set\n";
+        }
+        else
+        { std::cerr << "\ninfo = " << info << "\n";
+        }
+        zgetri_(&N, (__CLPK_doublecomplex *) mxinv1->data, &lda, ipiv, (__CLPK_doublecomplex *) work, &lwork, &info);
+#endif
+        if(info == 0)
+        { // all is well.
+        }
+        else if (info > 0)
+        { std::cerr << "\nn_matrix: Inversion failed to converge\n";
+          std::cerr << "info = " << info << "\n";
+        }
+        else if(info == -55555)
+        { std::cerr << "\nReturn value, 'info', does not appear to have been set\n";
+        }
+        else
+        { std::cerr << "\ninfo = " << info << "\n";
+        }
+   
+        // Copy results back into D and U.
+        complex *hmxp = mxinv1->data;
+        for(int i=0; i<nrows; i++)
+        { for(int j=0; j<nrows; j++)
+          { mxinv->put(hmxp[j*nrows+i], i, j);
+          }
+        }
+        delete mxinv1;
+        delete [] ipiv;
+#ifdef _USING_LAPACK_
+        delete [] work;
+#endif
+//      std::cerr << "n_matrix inv: using sunperflib code\n";
+
+
+  }
+  #else
   int *indx;
-  indx = new int[nd];
+  indx = new int[nrows];
   n_matrix* nLU = new n_matrix(*this);          // Copy input array for LU decomp
   nLU->LU_decomp(indx);                         // LU decomposition of nLU
-  n_matrix* I = new n_matrix(nd,nd,complex0);   // Constuct a new n_matrix
-  for(int i=0; i<nd; i++) I->put(complex1,i,i); // Make it the identity matrix
+  n_matrix* I = new n_matrix(nrows,nrows,complex0);   // Constuct a new n_matrix
+  for(int i=0; i<nrows; i++) I->put(complex1,i,i); // Make it the identity matrix
   _matrix* mxinv = I->LUinv(indx,nLU);          // Back solve for inverse
   delete nLU;                                   // Clean up the nLU matrix
   delete I;                                     // Clean up the I matrix
   delete [] indx;
+#endif
   return mxinv;                                 // Return the inverse of nmx
   }
 
@@ -4027,7 +4124,7 @@ void n_matrix::diag(_matrix* (&mxd), _matrix* (&mxev))
   else						// Matrix isnt Hermitian
     {
 #if defined(_USING_SUNPERFLIB_) || defined(_USING_LAPACK_)
-    if(nrows < 128)  // then do it the old fashioned gamma way.
+    if(nrows < 32)  // then do it the old fashioned gamma way.
     {
       n_matrix mx2(*this);			// Workspace to diagonalization
       complex *tmp = mx2.corth(0,nrows-1);		// To upper Hessenberg form
@@ -4038,7 +4135,7 @@ void n_matrix::diag(_matrix* (&mxd), _matrix* (&mxev))
         NMxfatal(28);				// Unable to diagonalize
       }						// Almost surely bad input
       delete [] tmp;				// Delete tmp array from corth
-//    std::cerr << "n_matrix diag: using gamma code\n";
+//    std::cerr << "n_matrix diag: using gamma code: dimension = " << nrows << "\n";
     }
     else
     {

From e7c987f3e573915c3e8deecd639758e397f79424 Mon Sep 17 00:00:00 2001
From: Matthias Ernst <maer@ethz.ch>
Date: Thu, 9 Mar 2023 11:57:08 +0100
Subject: [PATCH 2/5] Corrected parsing error in Exch()

---
 src/MultiSys/ExProcess.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/MultiSys/ExProcess.cc b/src/MultiSys/ExProcess.cc
index 332a0c2..c74a11c 100644
--- a/src/MultiSys/ExProcess.cc
+++ b/src/MultiSys/ExProcess.cc
@@ -213,7 +213,7 @@ bool ExchProc::parseExch(string& Exval,
   int len = Exval.length();			// Length of input string
   int nc  = Exval.find("<=>");			// Number of chars before <=>
   string SLHS = Exval.substr(1, nc-1);		// Get string between ( & <=>
-  string SRHS = Exval.substr(nc+3, len-6);	// Get string between <=> & )
+  string SRHS = Exval.substr(nc+3, len-(5+SLHS.length()));	// Get string between <=> & )
   if(!SLHS.length())				// If there isn't anything on
     { 						// the left of <=>, then we don't
     XPerror(33, 1);				// know left hand side components

From 4eeb20f9eb9571aff13439ce077bbabe6c4e3fb4 Mon Sep 17 00:00:00 2001
From: Matthias Ernst <maer@ethz.ch>
Date: Tue, 12 Nov 2024 15:20:51 +0100
Subject: [PATCH 3/5] Added the Pade method for calculating the matrix
 exponential. This is only implemented if a BLAS library isused since a solver
 for linear equations is needed. The Pade method is used if you use expm()
 instead of the standard exp() for matrices or the super operator.

It is not obvious when the exp() function which uses blocking of the
matrix and diagonalization or the expm() function using the Pade method
is faster. This seems to vary strongly.

prop() for operators is still implemented using diagonalization.
---
 src/LSLib/SuperOp.cc   | 120 ++++++++++++++
 src/LSLib/SuperOp.h    |  35 ++++
 src/Matrix/_matrix.cc  |   3 +-
 src/Matrix/_matrix.h   |   1 +
 src/Matrix/d_matrix.cc |   8 +
 src/Matrix/d_matrix.h  |   1 +
 src/Matrix/h_matrix.cc | 345 +++++++++++++++++++++++++++++++++++++++
 src/Matrix/h_matrix.h  |   1 +
 src/Matrix/i_matrix.cc |  16 +-
 src/Matrix/i_matrix.h  |   1 +
 src/Matrix/matrix.cc   |   4 +-
 src/Matrix/matrix.h    |   4 +-
 src/Matrix/n_matrix.cc | 363 ++++++++++++++++++++++++++++++++++++++++-
 src/Matrix/n_matrix.h  |   1 +
 14 files changed, 886 insertions(+), 17 deletions(-)

diff --git a/src/LSLib/SuperOp.cc b/src/LSLib/SuperOp.cc
index a19b4b1..a87a0db 100644
--- a/src/LSLib/SuperOp.cc
+++ b/src/LSLib/SuperOp.cc
@@ -1327,6 +1327,126 @@ super_op exp(const super_op& LOp1, const complex& t)
   return LOp;
   }
 
+// ----------------------------------------------------------------------------
+//                 Exponential Superoperators using Pade Method
+// ----------------------------------------------------------------------------
+
+
+	// Input		LOp   : Superoperator (this)
+        // Return		ExpLOp: Exponential of LOp
+	//				ExpLOp = exp(LOp) (Pade method)
+        // Note			      : Exponential output in same base as LOp
+
+super_op super_op::expm() const
+  {
+  if(!HSp)				// Check for NULL LOp
+    {
+    LOperror(5, "exp", 1);		// Error during LOp exp function
+    LOpfatal(7);			// Accessing Null LOp
+    }
+  super_op ExpLOp(*this);		// Copy LOp in its current base
+  matrix mx1 = this->mx;
+  
+  ExpLOp.mx = mx1.expm();
+  
+  return ExpLOp;
+  }
+
+
+super_op super_op::expm(const complex& t, double cutoff) const
+
+        // Input                LOp   : Superoperator (this)
+        //                      t     : Exponential factor
+        //                      cutoff: Exponential factor roundoff
+        // Return               ExpLOp: Exponential of LOp
+        //                              ExpLOp = exp(t*LOp)
+        // Note                       : Exponential output in EBR of LOp
+        // Note                       : L0p's EBR is generated herein
+        // Note                       : Value of t is considered 0 if
+        //                              it's magnituded is less than cutoff
+
+  {
+  if(!HSp)				// Check for NULL LOp
+    {
+    LOperror(5, "exp", 1);		// Error during LOp exp function
+    LOpfatal(7);			// Accessing NULL Lop
+    }
+  super_op ExpLOp(*this);		// Copy LOp in its current base
+  if(norm(t) < fabs(cutoff))
+    {
+    matrix mx(LSp, LSp, i_matrix_type);	// ExpLOp is the identity superop
+    ExpLOp.mx = mx;
+    }
+  else
+    {
+    matrix mx1 = this->mx*t;
+    ExpLOp.mx = mx1.expm();
+    }
+  return ExpLOp;
+  }
+
+  
+
+
+	// Input		LOp1  : Superoperator
+        // Return		LOp   : Exponential of LOp1
+	//				LOp = exp(LOp1)
+        // Note			      : Computed in EBR of LOp1
+        // Note			      : Superoperator output in EBR of LOp1
+
+super_op expm(const super_op& LOp1)
+  {
+  super_op LOp = LOp1;
+  if(LOp1.HSp)				// Check for NULL LOp
+    {
+    matrix m1 = LOp.get_mx();
+    LOp.put_mx(m1.expm());
+    return LOp;
+    }
+  else
+    {
+    LOp1.LOperror(5, "exp", 1);	// Error during LOp exp function
+    LOp1.LOpfatal(7);		// Accessing Null LOp
+    }
+  return LOp;
+  }
+
+  
+super_op expm(const super_op& LOp1, const complex& t)
+//return LOp(matrix(LOp1.LSp, LOp1.LSp, d_matrix_type));
+
+	// Input		LOp1  : Superoperator
+	//			t     : A time
+        // Return		LOp   : Exponential of LOp1
+	//				LOp = exp(LOp1*t)
+        // Note			      : Computed in EBR of LOp1
+
+  {
+  super_op LOp(matrix(LOp1.LSp, LOp1.LSp, d_matrix_type));
+  if(LOp.HSp) 				// Check for NULL LOp
+    {
+    if(t != complex0)
+      {
+      matrix m1 = LOp.get_mx()*t;
+      LOp.put_mx(m1.expm());
+      return LOp;
+      }
+    else
+      {
+      matrix mx(LOp1.LSp, LOp1.LSp, i_matrix_type);	// LOp is the identity superoperator
+      LOp.mx = mx;
+      }
+    LOp.Hbs = LOp1.Hbs;			// LOp Hilbert space basis same as LOp1's
+    LOp.Lbs = LOp1.Lbs;			// LOp Liouville space basis same as LOp1's
+    }
+  else
+    {
+    LOp1.LOperror(5, "exp", 1);	// Error during LOp exp function
+    LOp1.LOpfatal(7);		// Accessing Null LOp
+    }
+  return LOp;
+  }
+
 
 // ----------------------------------------------------------------------------
 //                      Superoperators Taken To A Power
diff --git a/src/LSLib/SuperOp.h b/src/LSLib/SuperOp.h
index a6bca2f..45ec76c 100644
--- a/src/LSLib/SuperOp.h
+++ b/src/LSLib/SuperOp.h
@@ -398,6 +398,41 @@ MSVCDLL friend super_op exp(const super_op& LOp1, const complex& t);
 	//				LOp = exp(LOp1*t)
         // Note			      : Computed in EBR of LOp1
   
+MSVCDLL super_op expm() const;
+
+        // Input                LOp   : Superoperator (this)
+        // Return               ExpLOp: Exponential of LOp
+        //                              ExpLOp = exp(LOp) (Pade method)
+        // Note                       : Exponential output in same base as LOp
+
+
+MSVCDLL super_op expm(const complex& t, double cutoff=1.e-12) const;
+
+        // Input                LOp   : Superoperator (this)
+	//			t     : Exponential factor
+	//			cutoff: Exponential factor roundoff
+        // Return               ExpLOp: Exponential of LOp
+        //                              ExpLOp = exp(t*LOp) (Pade method)
+        // Note                       : Exponential output in same base as LOp
+	// Note			      : Value of t is considered 0 if
+	//				it's magnituded is less than cutoff
+
+  
+MSVCDLL friend super_op expm(const super_op& LOp1);
+
+	// Input		LOp1  : Superoperator
+        // Return		LOp   : Exponential of LOp1
+	//				LOp = exp(LOp1) (Pade method)
+        // Note			      : Computed in same base as LOp1
+
+  
+MSVCDLL friend super_op expm(const super_op& LOp1, const complex& t);
+
+	// Input		LOp1  : Superoperator
+        // Return		LOp   : Exponential of LOp1
+	//				LOp = exp(LOp1*t) (Pade method)
+        // Note			      : Computed in same base as LOp1
+  
 
 MSVCDLL friend super_op pow(const super_op& LOp, int power);
 
diff --git a/src/Matrix/_matrix.cc b/src/Matrix/_matrix.cc
index 9c0629b..2cde493 100644
--- a/src/Matrix/_matrix.cc
+++ b/src/Matrix/_matrix.cc
@@ -376,7 +376,8 @@ _matrix* _matrix::IM()        {_MxFatal(5, "Imaginary");         return this; }
 _matrix* _matrix::conjugate() {_MxFatal(5, "Complex Conjugate"); return this; }
 _matrix* _matrix::transpose() {_MxFatal(5, "Transpose");         return this; }
 _matrix* _matrix::adjoint()   {_MxFatal(5, "Adjoint");           return this; }
-_matrix* _matrix::mxexp()     {_MxFatal(5, "Exponential");       return this; }
+_matrix* _matrix::mxexp()     {_MxFatal(5, "Exponential Diag");  return this; }
+_matrix* _matrix::mxpade()    {_MxFatal(5, "Exponential Pade");  return this; }
 complex  _matrix::trace()     {_MxFatal(5, "Trace");         return complex0; }
 
 _matrix* _matrix::swaprows(int i,int j) {_MxFatal(5,"swaprows"); return this;}
diff --git a/src/Matrix/_matrix.h b/src/Matrix/_matrix.h
index 5ceeb73..04bcbab 100644
--- a/src/Matrix/_matrix.h
+++ b/src/Matrix/_matrix.h
@@ -340,6 +340,7 @@ virtual _matrix* IM();
 virtual _matrix* conjugate();
 virtual _matrix* transpose();
 virtual _matrix* mxexp();
+virtual _matrix* mxpade();
 virtual _matrix* adjoint();
 virtual complex  trace();
 
diff --git a/src/Matrix/d_matrix.cc b/src/Matrix/d_matrix.cc
index defa3c0..8fb0a95 100644
--- a/src/Matrix/d_matrix.cc
+++ b/src/Matrix/d_matrix.cc
@@ -1260,6 +1260,14 @@ _matrix* d_matrix::mxexp()
   return mx;
   }
 
+_matrix* d_matrix::mxpade()
+  {
+  d_matrix* mx = new d_matrix(cols_, rows_);	// If complex, construct new d_matrix
+  for(int i=0; i<rows_; i++)			// Loop over all non_zero elements
+    mx->data[i] = exp(data[i]);			// <i|mx|i> = exp(<i|dmx*|i>)
+  return mx;
+  }
+
 complex d_matrix::trace()
   {
   complex z(0);
diff --git a/src/Matrix/d_matrix.h b/src/Matrix/d_matrix.h
index 5827e98..6928815 100644
--- a/src/Matrix/d_matrix.h
+++ b/src/Matrix/d_matrix.h
@@ -297,6 +297,7 @@ virtual _matrix* conjugate();
 virtual _matrix* transpose();
 virtual _matrix* adjoint();
 virtual _matrix* mxexp();
+virtual _matrix* mxpade();
 virtual complex  trace();
 
 /*     Function    Output                       Description
diff --git a/src/Matrix/h_matrix.cc b/src/Matrix/h_matrix.cc
index 0149028..1b70f33 100644
--- a/src/Matrix/h_matrix.cc
+++ b/src/Matrix/h_matrix.cc
@@ -87,6 +87,9 @@ extern "C"
   extern void zgetrf_(const int*, const int*, __CLPK_doublecomplex *, const int*, int*, int*);
   extern void zhetrf_(const char*, const int*, __CLPK_doublecomplex *, const int*, int*, __CLPK_doublecomplex *,
                       const int*, int*);
+  extern void zgesv_(const int*, const int*, __CLPK_doublecomplex *, const int*, 
+                      int*, __CLPK_doublecomplex*, const int*, int*);
+
 }
 #endif
 
@@ -1459,6 +1462,348 @@ _matrix* h_matrix::transpose()
 
 _matrix* h_matrix::adjoint() { return this; }
   
+
+/*  Matrix exponential using the Pade method. This is a lot faster than the
+    standard exp() function. Uses the same algorithm as MATLAB expm() and is
+    roughly as fast as the MATLAB implementation. 
+
+    Higham, N. J., The Scaling and Squaring Method for the Matrix Exponential Revisited,
+    SIAM J. Matrix Anal. Appl., 26(4) (2005), pp. 1179-1193.
+    Al-Mohy, A. H. and N. J. Higham, A new scaling and squaring algorithm for the matrix
+    exponential, SIAM J. Matrix Anal. Appl., 31(3) (2009), pp. 970-989.
+
+    This seems to be the fastest method currently.
+
+*/
+
+_matrix* h_matrix::mxpade()
+
+{ 
+#ifdef _USING_BLAS_
+
+// Pade needs a linear equation solver so it will only work if the BLAS libraries 
+// are installed. Otherwise we will just use the standard mxexp() which uses diagonalization.
+
+  double L1, n_squ, temp, maxnorm;
+  int k1;
+
+  if(rows_ != cols_)
+  { HMxerror(14,1);			//    Bad rectangular array use
+    HMxfatal(28);			//    Unable to diagonalize
+  }
+//std::cerr << "At Point 01 h_matrix.cc\n";
+  L1=0;
+  for(k1=0;k1<size;++k1)
+  { temp = norm(data[k1]);
+    if(temp > L1)
+      L1=temp;
+  }
+  _matrix *U;
+  _matrix *V;
+  _matrix *temp1;
+  _matrix *temp2;
+  _matrix *temp3;
+  n_squ=0;
+  _matrix* ident = new i_matrix(rows_,rows_);
+  _matrix* A2=this->multiply(this);
+//std::cerr << "At Point 02\n";
+  if(L1 < 1.495585217958292e-2)
+  { const double b[]={120.0, 60.0, 12.0, 1.0};
+//  std::cerr << "At Point 02 - 01\n";
+    temp1 = ident->multiply(b[1]);
+    temp2 = A2->multiply(b[3]);
+    temp3=temp1->add(temp2);
+    delete temp1;
+//  delete temp2;							// Do not delete temp12since b[3]=1 and the same buffer is returned
+    U = this->multiply(temp3);
+    delete temp3;
+//  U = this*(b[3]*A2+b[1]*ident);
+    temp1 = ident->multiply(b[0]);
+    temp2 = A2->multiply(b[2]);
+    V = temp1->add(temp2);
+    delete temp1;
+    delete temp2;
+//  V = b[2]*A2+b[0]*ident;
+  }
+  else
+  { _matrix *A4=A2->multiply(A2);
+    if(L1 < 2.539398330063230e-1)
+    { const double b[]={30240., 15120., 3360., 420., 30., 1.};
+//    std::cerr << "At Point 02 - 02\n";
+      temp1 = ident->multiply(b[1]);
+      temp2 = A2->multiply(b[3]);
+      temp3=temp1->add(temp2);
+      delete temp1;
+      delete temp2;
+      temp1 = A4->multiply(b[5]);
+      temp2=temp1->add(temp3);
+//    delete temp1;							// Do not delete temp1 since b[5]=1 and the same buffer is returned
+      delete temp3;
+      U = this->multiply(temp2);
+      delete temp2;
+//    U = this*(b[5]*A4+b[3]*A2+b[1]*ident);
+      temp1 = ident->multiply(b[0]);
+      temp2 = A2->multiply(b[2]);
+      temp3=temp1->add(temp2);
+      delete temp1;
+      delete temp2;
+      temp1 = A4->multiply(b[4]);
+      V=temp1->add(temp3);
+      delete temp1;
+      delete temp3;
+//    V = b[4]*A4+b[2]*A2+b[0]*ident;
+    }
+    else
+    { _matrix* A6=A4->multiply(A2);
+      if(L1 < 9.504178996162932e-1)
+      { const double b[]={17297280., 8648640., 1995840., 277200., 25200., 1512., 56., 1.};
+//      std::cerr << "At Point 02 - 03\n";
+        temp1 = ident->multiply(b[1]);
+        temp2 = A2->multiply(b[3]);
+        temp3=temp1->add(temp2);
+	delete temp1;
+	delete temp2;
+        temp1 = A4->multiply(b[5]);
+        temp2=temp1->add(temp3);
+	delete temp1;
+	delete temp3;
+        temp1 = A6->multiply(b[7]);
+        temp3=temp1->add(temp2);
+//      delete temp1;							// Do not delete temp1 since b[7]=1 and the same buffer is returned
+	delete temp2;
+        U = this->multiply(temp3);
+	delete temp3;
+//      U = this*(b[7]*A6+b[5]*A4+b[3]*A2+b[1]*ident);
+        temp1 = ident->multiply(b[0]);
+        temp2 = A2->multiply(b[2]);
+        temp3=temp1->add(temp2);
+	delete temp1;
+	delete temp2;
+        temp1 = A4->multiply(b[4]);
+        temp2=temp1->add(temp3);
+	delete temp1;
+	delete temp3;
+        temp1 = A6->multiply(b[6]);
+        V=temp1->add(temp2);
+	delete temp1;
+	delete temp2;
+//      V = b[6]*A6+b[4]*A4+b[2]*A2+b[0]*ident;
+      }
+      else
+      { _matrix* A8=A6->multiply(A2);
+        if(L1 < 2.097847961257068e0)
+        { const double b[]={17643225600., 8821612800., 2075673600., 302702400., 30270240.,
+	                    2162160., 110880., 3960., 90., 1.};
+//        std::cerr << "At Point 02 - 04\n";
+          temp1 = ident->multiply(b[1]);
+          temp2 = A2->multiply(b[3]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          temp1 = A4->multiply(b[5]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+          temp1 = A6->multiply(b[7]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          temp1 = A8->multiply(b[9]);
+          temp2=temp1->add(temp3);
+//        delete temp1;								// Do not delete temp1 since b[9]=1 and the same buffer is returned
+	  delete temp3;
+          U = this->multiply(temp2);
+	  delete temp2;
+//        U = this*(b[9]*A8+b[7]*A6+b[5]*A4+b[3]*A2+b[1]*ident);
+          temp1 = ident->multiply(b[0]);
+          temp2 = A2->multiply(b[2]);
+          temp3 = temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          temp1 = A4->multiply(b[4]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+          temp1 = A6->multiply(b[6]);
+          temp3 = temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          temp1 = A8->multiply(b[8]);
+          V=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+//        V = b[8]*A8+b[6]*A6+b[4]*A4+b[2]*A2+b[0]*ident;
+	}
+        else
+        { maxnorm = 5.371920351148152e0;
+          n_squ = std::max(0, int(std::ceil(log2(L1/maxnorm))));
+//        std::cerr << "n_squ = " << n_squ << "\n";
+          const double b[]={64764752532480000., 32382376266240000., 7771770303897600.,
+                            1187353796428800., 129060195264000., 10559470521600.,
+                            670442572800., 33522128640., 1323241920., 40840800., 960960.,
+                            16380., 182., 1.};
+//        std::cerr << "At Point 02 - 05\n";
+	  A2=A2->multiply_two(std::pow(2.0,-n_squ*2));
+	  A4=A4->multiply_two(std::pow(2.0,-n_squ*4));
+	  A6=A6->multiply_two(std::pow(2.0,-n_squ*6));
+          temp1 = A2->multiply(b[9]);
+          temp2 = A4->multiply(b[11]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+	  temp1 = A6->multiply(b[13]);
+          temp2=temp1->add(temp3);
+//        delete temp1;                                           // Do not delete temp1 since b[13]=1 and the same buffer is returned!
+	  delete temp3;
+          temp3 = A6->multiply(temp2);
+          delete temp2;
+	  temp1 = ident->multiply(b[1]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+	  temp1 = A2->multiply(b[3]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+	  temp1 = A4->multiply(b[5]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+	  temp1 = A6->multiply(b[7]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          U = this->multiply(temp3);
+	  delete temp3;
+	  U=U->multiply_two(std::pow(2.0,-n_squ));
+//        U = mx1*(A6*(b[13]*A6+b[11]*A4+b[9]*A2)+b[7]*A6+b[5]*A4+b[3]*A2+b[1]*ident);
+          temp1 = A2->multiply(b[8]);
+          temp2 = A4->multiply(b[10]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+	  temp1 = A6->multiply(b[12]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+          temp3 = A6->multiply(temp2);
+	  delete temp2;
+	  temp1 = ident->multiply(b[0]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+	  temp1 = A2->multiply(b[2]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+	  temp1 = A4->multiply(b[4]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+	  temp1 = A6->multiply(b[6]);
+          V=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+//        V = A6*(b[12]*A6+b[10]*A4+b[8]*A2)+b[6]*A6+b[4]*A4+b[2]*A2+b[0]*ident;
+        }
+	delete A8;
+      }
+     delete A6;
+    }
+    delete A4;
+  }
+  delete A2;
+//std::cerr << "At Point 03\n";
+//std::cerr << "This is U \n";
+//U->print(std::cerr,MxPrDefs);
+//std::cerr << "This is V \n";
+//V->print(std::cerr,MxPrDefs);
+  _matrix* P = U->add(V);
+  _matrix* Q = V->subtract(U);
+  n_matrix* P2 = (n_matrix *) P;
+  n_matrix* Q2 = (n_matrix *) Q;
+//std::cerr << "This is P \n";
+//P->print(std::cerr,MxPrDefs);
+//std::cerr << "This is Q \n";
+//Q->print(std::cerr,MxPrDefs);
+#ifdef _USING_SUNPERFLIB_
+  int N    = rows_;
+  int NHRS = rows_;
+  int LDA  = rows_;
+  int *ipiv = new int[rows_];
+  int LDB   = rows_;
+  int info  = -5555;
+  zgesv(N, NHRS, (doublecomplex *) Q2->data, LDA, ipiv, (doublecomplex *) P2->data, LDB, &info);
+#endif
+#ifdef _USING_LAPACK_
+#if defined(__LP64__) /* this is needed on MacOS CLAPACK to make types match*/
+  int N    = rows_;
+  int NHRS = rows_;
+  int LDA  = rows_;
+  int *ipiv = new int[rows_];
+  int LDB   = rows_;
+  int info  = -5555;
+#else
+  long int N    = rows_;
+  long int NHRS = rows_;
+  long int LDA  = rows_;
+  long int *ipiv = new long int[rows_];
+  long int LDB   = rows_;
+  long int info  = -5555;
+#endif
+//std::cerr << "At Point 04\n";
+  zgesv_(&N, &NHRS, (__CLPK_doublecomplex *) Q2->data, &LDA, ipiv, (__CLPK_doublecomplex *) P2->data, &LDB, &info);
+#endif
+  if(info == 0)
+  { // everything looks good
+  }
+  else
+  { HMxfatal(28);			//    Unable to diagonalize
+  }
+//std::cerr << "At Point 05\n";
+  _matrix* R = P2;
+  delete [] ipiv;
+  delete ident;
+//std::cerr << "At Point 06\n";
+  for(k1=0;k1<n_squ;++k1)
+  { temp1 = R->multiply(R);
+    delete R;
+    R=temp1;
+  }
+  delete U;
+  delete V;
+  delete Q2;
+//std::cerr << "At Point 07\n";
+  return(R);
+
+#else          // BLAS NOT available
+
+// Without BLAS, we just use the standard mxexp() function here
+// This will be much slower. But Pade needs a linear equation solver.
+
+  _matrix* dmx;                                 // For eigenvalues
+  _matrix* emx;                                 // For eigenvectors
+  diag(dmx, emx);                               // Diagonalize mx to dmx & emx 
+  complex *d00 = ((d_matrix*)dmx)->data;        // Start of dmx: d00 = <0|dmx|0>
+  complex *dii, *dend = d00 + cols_;            // Indexing on dmx diagonal
+  for(dii=d00; dii<dend; dii++)                 // Effective loop over i
+    (*dii) = exp(*dii);                         // Set <i|dmx|i> to exp(<i|dmx|i>
+//  _matrix* pdt1  = dmx->multiply(emx);                // Perform exp(D)*E
+//  _matrix* emxi  = emx->inv();                        // Get inverse of emx
+//  _matrix* expmx = emxi->multiply(pdt1);      // Perform inv(E)*exp(D)*E
+
+  _matrix* emxi  = emx->inv();                  // Get inverse of emx
+  _matrix* pdt1  = dmx->multiply(emxi);
+  _matrix* expmx = emx->multiply(pdt1);
+  delete dmx;                                   // Clean up the dmx matrix
+  delete emx;                                   // Clean up the emx matrix
+  delete pdt1;                                  // Clean up the pdt1 matrix
+  delete emxi;                                  // Clean up the emxi matrix
+  return expmx;
+#endif
+}
+
+
 _matrix* h_matrix::mxexp()
   {
   _matrix* dmx;                                 // For eigenvalues
diff --git a/src/Matrix/h_matrix.h b/src/Matrix/h_matrix.h
index b37aacf..365500e 100644
--- a/src/Matrix/h_matrix.h
+++ b/src/Matrix/h_matrix.h
@@ -290,6 +290,7 @@ virtual _matrix* conjugate();
 virtual _matrix* transpose();
 virtual _matrix* adjoint();
 virtual _matrix* mxexp();
+virtual _matrix* mxpade();
 virtual complex  trace();
 
 /*     Function    Output                       Description
diff --git a/src/Matrix/i_matrix.cc b/src/Matrix/i_matrix.cc
index a756bb3..d0a5bab 100644
--- a/src/Matrix/i_matrix.cc
+++ b/src/Matrix/i_matrix.cc
@@ -285,7 +285,9 @@ hermitian_type i_matrix::test_hermitian(double d) const
 
 matrix_type i_matrix::stored_type( ) const { return i_matrix_type; }
 matrix_type i_matrix::test_type(const matrix_type m, const double d) const	
-                                           { return m; double dtmp; dtmp=d; }
+                                           { return m;}
+// 10/11/2024 changed by MAER to eliminate compiler complaints
+//                                           { return m; double dtmp; dtmp=d; }
 std::string i_matrix::mxtype()        const { return std::string("Identity"); }
 std::string i_matrix::mxtype(bool pf) const { return std::string("Identity"); }
 
@@ -500,7 +502,7 @@ _matrix* i_matrix::divide(_matrix* mx)
     IMxfatal(3, "divide");			//   Fail in divide function
     }
   else
-    switch(mx->stored_type())
+  { switch(mx->stored_type())
       {
       case i_matrix_type: return this; break;   // Divide imx into imx
       case d_matrix_type:                       // Divide imx by dmx
@@ -513,6 +515,7 @@ _matrix* i_matrix::divide(_matrix* mx)
         return mx->inv();			// Return inv(mx)
         break;
       }   
+  }
     IMxerror(6, " mx1 / mx2", 1);		//   Matrix divide problems
     IMxerror(3, " divide", 1);			//   Matrix divide problems
     IMxfatal(25);				//   Not fully implemented
@@ -745,6 +748,7 @@ _matrix* i_matrix::adjoint()   { return this; }		    // imx self adjoint
 _matrix* i_matrix::negate()    { return new d_matrix(rows_,cols_,-complex1); } 
 _matrix* i_matrix::IM()        { return new d_matrix(rows_,cols_, complex0); }
 _matrix* i_matrix::mxexp()     { return new d_matrix(rows_,cols_, exp(1.0)); }
+_matrix* i_matrix::mxpade()    { return new d_matrix(rows_,cols_, exp(1.0)); }
 complex  i_matrix::trace()     { return complex(rows_); }
 
 /*     Function    Output                       Description
@@ -1030,9 +1034,11 @@ void i_matrix::print(std::ostream& ostr, const MxPrint& PFlgs) const
   for(i=0; i<rows_; i++ )		// Loop rows
     {
     ostr << sp;
-    for(j=0; j<i; j++)       ostr << "0 ";
-                             ostr << "1 ";
-    for(j=i+1; j<cols_; j++) ostr << "0 ";
+    for(j=0; j<i; j++)  
+      ostr << "0 ";
+    ostr << "1 ";
+    for(j=i+1; j<cols_; j++)
+      ostr << "0 ";
     ostr << "\n";
     }
   }
diff --git a/src/Matrix/i_matrix.h b/src/Matrix/i_matrix.h
index f7a71c0..1dde843 100644
--- a/src/Matrix/i_matrix.h
+++ b/src/Matrix/i_matrix.h
@@ -296,6 +296,7 @@ virtual        _matrix* conjugate();		// Return is this
 virtual inline _matrix* transpose();		// Return is this
 virtual inline _matrix* adjoint();		// Return is this
 virtual inline _matrix* mxexp();		// Return is this
+virtual inline _matrix* mxpade();		// Return is this
 virtual inline complex  trace();		// Return is size
 
 /*     Function    Output                       Description
diff --git a/src/Matrix/matrix.cc b/src/Matrix/matrix.cc
index f1df37b..26ec2df 100644
--- a/src/Matrix/matrix.cc
+++ b/src/Matrix/matrix.cc
@@ -891,7 +891,8 @@ matrix& matrix::operator /= (double d)	// Implements this /= d
           conj	           mx1 = mx* 	  conjugate <i|mx1|j>=<i|mx*|j>
         transpose 	   mx1 = mxt	  transpose <i|mx1|j>=<j|mx|i>
         adjoint	           mx1 = mxt*	  adjoint   <i|mx1|j>=<j|mx*|i>
-	 exp               mx1 = exp(mx)  exponentiation 
+	 exp               mx1 = exp(mx)  exponentiation by diagonalization
+	 expm              mx1 = expm(mx) exponentiation by Pade approximation
 	 trace		     z = Tr{mx}	  trace     z = sum <i|mx|i>         */
 
 //matrix operator- (const matrix& mx) { return matrix(mx.m->negate());    }
@@ -909,6 +910,7 @@ matrix  matrix::conj()       const  { return matrix(m->conjugate()); }
 matrix  matrix::transpose()  const  { return matrix(m->transpose()); }
 matrix  matrix::adjoint()    const  { return matrix(m->adjoint());   }
 matrix  matrix::exp()        const  { return matrix(m->mxexp());     }
+matrix  matrix::expm()       const  { return matrix(m->mxpade());     }
 complex matrix::trace()      const  { return        m->trace();      }
 
 /*     Function    Output                       Description
diff --git a/src/Matrix/matrix.h b/src/Matrix/matrix.h
index 6e7a894..f1ccfe4 100644
--- a/src/Matrix/matrix.h
+++ b/src/Matrix/matrix.h
@@ -366,7 +366,8 @@ MSVCDLL matrix& operator /= (double d);
           conj                mx1 = mx*         conjugate   <i|mx1|j>=<i|mx*|j>
         transpose             mx1 = mxt         transpose   <i|mx1|j>=<j|mx|i>
         adjoint               mx1 = mxt*        adjoint     <i|mx1|j>=<j|mx*|i>
-          exp                 mx1 = exp(mx)     exponential <i|mx1|j>=<j|mx*|i>
+          exp                 mx1 = exp(mx)     exponential by diagonalization
+          expm                mx1 = expm(mx)    exponential by Pade approximation
          trace                  z = Tr{mx}      trace       z = sum <i|mx|i> */
 
 MSVCDLL friend matrix  Re(const matrix& mx);
@@ -385,6 +386,7 @@ MSVCDLL matrix  conj()       const;
 MSVCDLL matrix  transpose()  const; 
 MSVCDLL matrix  adjoint()    const;
 MSVCDLL matrix  exp()        const;
+MSVCDLL matrix  expm()       const;
 MSVCDLL complex trace()      const;    
 
 /*     Function    Output                       Description
diff --git a/src/Matrix/n_matrix.cc b/src/Matrix/n_matrix.cc
index 1ec4ffa..b978aee 100644
--- a/src/Matrix/n_matrix.cc
+++ b/src/Matrix/n_matrix.cc
@@ -84,6 +84,8 @@ extern "C"
   extern void zgetrf_(const int*, const int*, __CLPK_doublecomplex *, const int*, int*, int*);
   extern void zhetrf_(const char*, const int*, __CLPK_doublecomplex *, const int*, int*, __CLPK_doublecomplex *,
                       int*);
+  extern void zgesv_(const int*, const int*, __CLPK_doublecomplex *, const int*, 
+                       int*, __CLPK_doublecomplex*, const int*, int*);
 }
 #endif
 
@@ -1368,7 +1370,350 @@ _matrix* n_matrix::adjoint()
   return amx;
   }
 
-/*  This curretly uses an eigensystem method for generating the exponential.
+
+/*  Matrix exponential using the Pade method. This is a lot faster than the
+    standard exp() function. Uses the same algorithm as MATLAB expm() and is
+    roughly as fast as the MATLAB implementation. 
+
+    Higham, N. J., The Scaling and Squaring Method for the Matrix Exponential Revisited,
+    SIAM J. Matrix Anal. Appl., 26(4) (2005), pp. 1179-1193.
+    Al-Mohy, A. H. and N. J. Higham, A new scaling and squaring algorithm for the matrix
+    exponential, SIAM J. Matrix Anal. Appl., 31(3) (2009), pp. 970-989.
+
+    This seems to be the fastest method currently.
+
+*/
+
+
+_matrix* n_matrix::mxpade()
+
+{ 
+#ifdef _USING_BLAS_
+
+// Pade needs a linear equation solver so it will only work if the BLAS libraries 
+// are installed. Otherwise we will just use the standard mxexp() which uses diagonalization.
+
+  double L1, n_squ, temp, maxnorm;
+  int k1;
+
+  if(rows_ != cols_)
+  { NMxerror(14,1);			//    Bad rectangular array use
+    NMxfatal(28);			//    Unable to diagonalize
+  }
+//std::cerr << "At Point 01 n_matrix.cc\n";
+  L1=0;
+  for(k1=0;k1<size;++k1)
+  { temp = norm(data[k1]);
+    if(temp > L1)
+      L1=temp;
+  }
+  _matrix *U;
+  _matrix *V;
+  _matrix *temp1;
+  _matrix *temp2;
+  _matrix *temp3;
+  n_squ=0;
+  _matrix* ident = new i_matrix(rows_,rows_);
+  _matrix* A2=this->multiply(this);
+//std::cerr << "At Point 02\n";
+  if(L1 < 1.495585217958292e-2)
+  { const double b[]={120.0, 60.0, 12.0, 1.0};
+//  std::cerr << "At Point 02 - 01\n";
+    temp1 = ident->multiply(b[1]);
+    temp2 = A2->multiply(b[3]);
+    temp3=temp1->add(temp2);
+    delete temp1;
+//  delete temp2;							// Do not delete temp12since b[3]=1 and the same buffer is returned
+    U = this->multiply(temp3);
+    delete temp3;
+//  U = this*(b[3]*A2+b[1]*ident);
+    temp1 = ident->multiply(b[0]);
+    temp2 = A2->multiply(b[2]);
+    V = temp1->add(temp2);
+    delete temp1;
+    delete temp2;
+//  V = b[2]*A2+b[0]*ident;
+  }
+  else
+  { _matrix *A4=A2->multiply(A2);
+    if(L1 < 2.539398330063230e-1)
+    { const double b[]={30240., 15120., 3360., 420., 30., 1.};
+//    std::cerr << "At Point 02 - 02\n";
+      temp1 = ident->multiply(b[1]);
+      temp2 = A2->multiply(b[3]);
+      temp3=temp1->add(temp2);
+      delete temp1;
+      delete temp2;
+      temp1 = A4->multiply(b[5]);
+      temp2=temp1->add(temp3);
+//    delete temp1;							// Do not delete temp1 since b[5]=1 and the same buffer is returned
+      delete temp3;
+      U = this->multiply(temp2);
+      delete temp2;
+//    U = this*(b[5]*A4+b[3]*A2+b[1]*ident);
+      temp1 = ident->multiply(b[0]);
+      temp2 = A2->multiply(b[2]);
+      temp3=temp1->add(temp2);
+      delete temp1;
+      delete temp2;
+      temp1 = A4->multiply(b[4]);
+      V=temp1->add(temp3);
+      delete temp1;
+      delete temp3;
+//    V = b[4]*A4+b[2]*A2+b[0]*ident;
+    }
+    else
+    { _matrix* A6=A4->multiply(A2);
+      if(L1 < 9.504178996162932e-1)
+      { const double b[]={17297280., 8648640., 1995840., 277200., 25200., 1512., 56., 1.};
+//      std::cerr << "At Point 02 - 03\n";
+        temp1 = ident->multiply(b[1]);
+        temp2 = A2->multiply(b[3]);
+        temp3=temp1->add(temp2);
+	delete temp1;
+	delete temp2;
+        temp1 = A4->multiply(b[5]);
+        temp2=temp1->add(temp3);
+	delete temp1;
+	delete temp3;
+        temp1 = A6->multiply(b[7]);
+        temp3=temp1->add(temp2);
+//      delete temp1;							// Do not delete temp1 since b[7]=1 and the same buffer is returned
+	delete temp2;
+        U = this->multiply(temp3);
+	delete temp3;
+//      U = this*(b[7]*A6+b[5]*A4+b[3]*A2+b[1]*ident);
+        temp1 = ident->multiply(b[0]);
+        temp2 = A2->multiply(b[2]);
+        temp3=temp1->add(temp2);
+	delete temp1;
+	delete temp2;
+        temp1 = A4->multiply(b[4]);
+        temp2=temp1->add(temp3);
+	delete temp1;
+	delete temp3;
+        temp1 = A6->multiply(b[6]);
+        V=temp1->add(temp2);
+	delete temp1;
+	delete temp2;
+//      V = b[6]*A6+b[4]*A4+b[2]*A2+b[0]*ident;
+      }
+      else
+      { _matrix* A8=A6->multiply(A2);
+        if(L1 < 2.097847961257068e0)
+        { const double b[]={17643225600., 8821612800., 2075673600., 302702400., 30270240.,
+	                    2162160., 110880., 3960., 90., 1.};
+//        std::cerr << "At Point 02 - 04\n";
+          temp1 = ident->multiply(b[1]);
+          temp2 = A2->multiply(b[3]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          temp1 = A4->multiply(b[5]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+          temp1 = A6->multiply(b[7]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          temp1 = A8->multiply(b[9]);
+          temp2=temp1->add(temp3);
+//        delete temp1;								// Do not delete temp1 since b[9]=1 and the same buffer is returned
+	  delete temp3;
+          U = this->multiply(temp2);
+	  delete temp2;
+//        U = this*(b[9]*A8+b[7]*A6+b[5]*A4+b[3]*A2+b[1]*ident);
+          temp1 = ident->multiply(b[0]);
+          temp2 = A2->multiply(b[2]);
+          temp3 = temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          temp1 = A4->multiply(b[4]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+          temp1 = A6->multiply(b[6]);
+          temp3 = temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          temp1 = A8->multiply(b[8]);
+          V=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+//        V = b[8]*A8+b[6]*A6+b[4]*A4+b[2]*A2+b[0]*ident;
+	}
+        else
+        { maxnorm = 5.371920351148152e0;
+          n_squ = std::max(0, int(std::ceil(std::log(L1/maxnorm))));
+//        std::cerr << "n_squ = " << n_squ << "\n";
+          const double b[]={64764752532480000., 32382376266240000., 7771770303897600.,
+                            1187353796428800., 129060195264000., 10559470521600.,
+                            670442572800., 33522128640., 1323241920., 40840800., 960960.,
+                            16380., 182., 1.};
+//        std::cerr << "At Point 02 - 05\n";
+	  A2=A2->multiply_two(std::pow(2.0,-n_squ*2));
+	  A4=A4->multiply_two(std::pow(2.0,-n_squ*4));
+	  A6=A6->multiply_two(std::pow(2.0,-n_squ*6));
+          temp1 = A2->multiply(b[9]);
+          temp2 = A4->multiply(b[11]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+	  temp1 = A6->multiply(b[13]);
+          temp2=temp1->add(temp3);
+//        delete temp1;                                           // Do not delete temp1 since b[13]=1 and the same buffer is returned!
+	  delete temp3;
+          temp3 = A6->multiply(temp2);
+          delete temp2;
+	  temp1 = ident->multiply(b[1]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+	  temp1 = A2->multiply(b[3]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+	  temp1 = A4->multiply(b[5]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+	  temp1 = A6->multiply(b[7]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          U = this->multiply(temp3);
+	  delete temp3;
+	  U=U->multiply_two(std::pow(2.0,-n_squ));
+//        U = mx1*(A6*(b[13]*A6+b[11]*A4+b[9]*A2)+b[7]*A6+b[5]*A4+b[3]*A2+b[1]*ident);
+          temp1 = A2->multiply(b[8]);
+          temp2 = A4->multiply(b[10]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+	  temp1 = A6->multiply(b[12]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+          temp3 = A6->multiply(temp2);
+	  delete temp2;
+	  temp1 = ident->multiply(b[0]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+	  temp1 = A2->multiply(b[2]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+	  temp1 = A4->multiply(b[4]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+	  temp1 = A6->multiply(b[6]);
+          V=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+//        V = A6*(b[12]*A6+b[10]*A4+b[8]*A2)+b[6]*A6+b[4]*A4+b[2]*A2+b[0]*ident;
+        }
+	delete A8;
+      }
+     delete A6;
+    }
+    delete A4;
+  }
+  delete A2;
+//std::cerr << "At Point 03\n";
+//std::cerr << "This is U \n";
+//U->print(std::cerr,MxPrDefs);
+//std::cerr << "This is V \n";
+//V->print(std::cerr,MxPrDefs);
+  _matrix* P = U->add(V);
+  _matrix* Q = V->subtract(U);
+  n_matrix* P2 = (n_matrix *) P;
+  n_matrix* Q2 = (n_matrix *) Q;
+//std::cerr << "This is P \n";
+//P->print(std::cerr,MxPrDefs);
+//std::cerr << "This is Q \n";
+//Q->print(std::cerr,MxPrDefs);
+#ifdef _USING_SUNPERFLIB_
+  int N    = rows_;
+  int NHRS = rows_;
+  int LDA  = rows_;
+  int *ipiv = new int[rows_];
+  int LDB   = rows_;
+  int info  = -5555;
+  zgesv(N, NHRS, (doublecomplex *) Q2->data, LDA, ipiv, (doublecomplex *) P2->data, LDB, &info);
+#endif
+#ifdef _USING_LAPACK_
+#if defined(__LP64__) /* this is needed on MacOS CLAPACK to make types match*/
+  int N    = rows_;
+  int NHRS = rows_;
+  int LDA  = rows_;
+  int *ipiv = new int[rows_];
+  int LDB   = rows_;
+  int info  = -5555;
+#else
+  long int N    = rows_;
+  long int NHRS = rows_;
+  long int LDA  = rows_;
+  long int *ipiv = new long int[rows_];
+  long int LDB   = rows_;
+  long int info  = -5555;
+#endif
+//std::cerr << "At Point 04\n";
+  zgesv_(&N, &NHRS, (__CLPK_doublecomplex *) Q2->data, &LDA, ipiv, (__CLPK_doublecomplex *) P2->data, &LDB, &info);
+#endif
+  if(info == 0)
+  { // everything looks good
+  }
+  else
+  { NMxfatal(28);			//    Unable to diagonalize
+  }
+//std::cerr << "At Point 05\n";
+  _matrix* R = P2;
+  delete [] ipiv;
+  delete ident;
+//std::cerr << "At Point 06\n";
+  for(k1=0;k1<n_squ;++k1)
+  { temp1 = R->multiply(R);
+    delete R;
+    R=temp1;
+  }
+  delete U;
+  delete V;
+  delete Q2;
+//std::cerr << "At Point 07\n";
+  return(R);
+
+#else          // BLAS NOT available
+
+// Without BLAS, we just use the standard mxexp() function here
+// This will be much slower. But Pade needs a linear equation solver.
+  
+  _matrix* dmx;					// For eigenvalues
+  _matrix* emx;					// For eigenvectors
+  diag(dmx, emx);				// Diagonalize mx to dmx & emx 
+  complex *d00 = ((d_matrix*)dmx)->data;	// Start of dmx: d00 = <0|dmx|0>
+  complex *dii, *dend = d00 + cols_;		// Indexing on dmx diagonal
+  for(dii=d00; dii<dend; dii++)			// Effective loop over i
+    (*dii) = exp(*dii);				// Set <i|dmx|i> to exp(<i|dmx|i>
+//  _matrix* pdt1  = dmx->multiply(emx);		// Perform exp(D)*E
+//  _matrix* emxi  = emx->inv();			// Get inverse of emx
+//  _matrix* expmx = emxi->multiply(pdt1);	// Perform inv(E)*exp(D)*E
+
+  _matrix* emxi  = emx->inv();			// Get inverse of emx
+  _matrix* pdt1  = dmx->multiply(emxi);
+  _matrix* expmx = emx->multiply(pdt1);
+  delete dmx;                                   // Clean up the dmx matrix
+  delete emx;                                   // Clean up the emx matrix
+  delete pdt1;					// Clean up the pdt1 matrix
+  delete emxi;					// Clean up the emxi matrix
+  return expmx;
+#endif
+}
+
+
+/*  This currently uses an eigensystem method for generating the exponential.
 
                    exp(A) = exp[E*D*inv(E)] = E*exp(D)*inv(E)
 
@@ -2311,14 +2656,14 @@ std::vector<std::string> n_matrix::printStrings(const MxPrint& PFlgs) const
     if(is_real())           ptype = 1;          // Real elements output
     else if(is_imaginary()) ptype = 2;          // Imag elements output
     }
-  int    elen;                                  // A single element length
-  switch(ptype)                                 // Get the element length
-    {                                           // from class complex. This 
-    default:                                    // depends on the output
-    case 0: elen = complex::zlength(); break;   // format
-    case 1:
-    case 2: elen = complex::dlength(); break;
-    }
+//int    elen;                                  // A single element length
+//switch(ptype)                                 // Get the element length
+//  {                                           // from class complex. This 
+//  default:                                    // depends on the output
+//  case 0: elen = complex::zlength(); break;   // format
+//  case 1:
+//  case 2: elen = complex::dlength(); break;
+//  }
   std::string pline;
   std::string efmt = complex::dformat();        // Real/Imag element format
   int i,j,pos;
diff --git a/src/Matrix/n_matrix.h b/src/Matrix/n_matrix.h
index 4fe44b4..1955f95 100644
--- a/src/Matrix/n_matrix.h
+++ b/src/Matrix/n_matrix.h
@@ -366,6 +366,7 @@ virtual _matrix* conjugate();
 virtual _matrix* transpose();
 virtual _matrix* adjoint();
 virtual _matrix* mxexp();
+virtual _matrix* mxpade();
 virtual complex  trace();
 
 /*     Function    Output                       Description

From 90593ef938b521b22e8011a70802910db7f0290f Mon Sep 17 00:00:00 2001
From: Matthias Ernst <maer@ethz.ch>
Date: Wed, 13 Nov 2024 07:42:36 +0100
Subject: [PATCH 4/5] Added expm (Pade based exponentiation) to class gen_op,
 floq_op, and floq2_op. No standard functionality has been changed. These are
 just additional functions that can be used as drop-in replacements for the
 exp function if desired. They are only different from exp if a BLAS library
 is used.

---
 src/Floquet/Floq2Op.cc | 12 +++++++
 src/Floquet/Floq2Op.h  |  8 ++++-
 src/Floquet/FloqOp.cc  | 26 +++++++++++++++
 src/Floquet/FloqOp.h   | 12 +++++++
 src/HSLib/GenOp.cc     | 76 ++++++++++++++++++++++++++++++++++++------
 src/HSLib/GenOp.h      | 18 ++++++++++
 6 files changed, 141 insertions(+), 11 deletions(-)

diff --git a/src/Floquet/Floq2Op.cc b/src/Floquet/Floq2Op.cc
index eb0c9fd..5500eb9 100644
--- a/src/Floquet/Floq2Op.cc
+++ b/src/Floquet/Floq2Op.cc
@@ -803,6 +803,18 @@ floq2_op exp(floq2_op &Op1)
   floq2_op EXpOp1(Op1.N1, Op1.N2, Op1.hs, Op1._omega1, Op1._omega2);
    EXpOp1.gen_op::operator= (exp((gen_op&) Op1));  // calculate exp(Op1)
   return EXpOp1;
+  }
+
+
+	// Input		Op1   : Floquet operator
+        // Return		Op    : exponential of Op1 using Pade
+	//				Op = exp(Op1)
+
+floq2_op expm(floq2_op &Op1)
+  {  
+  floq2_op EXpOp1(Op1.N1, Op1.N2, Op1.hs, Op1._omega1, Op1._omega2);
+   EXpOp1.gen_op::operator= (Op1.gen_op::expm());  // calculate exp(Op1)
+  return EXpOp1;
   }
 
 /*
diff --git a/src/Floquet/Floq2Op.h b/src/Floquet/Floq2Op.h
index 5e6e028..3cf5ad3 100644
--- a/src/Floquet/Floq2Op.h
+++ b/src/Floquet/Floq2Op.h
@@ -243,13 +243,19 @@ MSVCDLL        void     operator /= (      double   d);
         // Output          gen_op    : Operator containing trace(s) over    
         //                             Photon space
 
-MSVCDLL friend floq2_op exp (floq2_op &Op1);
+MSVCDLL friend floq2_op exp(floq2_op &Op1);
 	
         // Input		Op1  : Floquet operator
         // Return		Op   : exponential of Op1
 	//			       Op = exp(Op1)
         // Note			     : Computed in EBR of Op1
 
+MSVCDLL friend floq2_op expm(floq2_op &Op1);
+	
+        // Input		Op1  : Floquet operator
+        // Return		Op   : exponential of Op1 using Pade
+	//			       Op = exp(Op1)
+
 /*  friend floq2_op prop (floq2_op &FLOQHAM , double &time);
  
 	// Input		Op1   : Floquet Hamiltonian
diff --git a/src/Floquet/FloqOp.cc b/src/Floquet/FloqOp.cc
index 032ca2a..6972f72 100644
--- a/src/Floquet/FloqOp.cc
+++ b/src/Floquet/FloqOp.cc
@@ -653,6 +653,32 @@ floq_op floq_op::exp()
   return ExpFOp;
   }
 
+        // Input		FOp	: Floquet operator
+        // Return		ExpFOp	: Exponential of FOp using Pade
+	//				   ExpFOp = exp(FOp)
+
+floq_op expm(const floq_op& FOp)
+  {
+  floq_op ExpFOp(FOp.N, FOp.hs, FOp.Om);	// Allocate return operator
+  ExpFOp.gen_op::operator= (FOp.gen_op::expm());	// Set exponential array
+  return ExpFOp;				// Return exponentiated FOp
+  }
+
+
+floq_op floq_op::expm()
+
+        // Input                FlOp    : Floquet operator (*this)
+        // Return               ExpFlOp : Exponential of Op1 using Pade
+        //                                   Op = exp(Op1)
+	// sosi				: Be nice to set this up as in
+	//				  class gen_op.....
+
+  {
+  floq_op ExpFOp(N, hs, Om);	// Allocate return operator
+  ExpFOp=*this;
+  ExpFOp.gen_op::operator= (ExpFOp.gen_op::expm());	// Set exponential array
+  return ExpFOp;
+  }
 
 floq_op prop(floq_op& FH, double time)
 
diff --git a/src/Floquet/FloqOp.h b/src/Floquet/FloqOp.h
index 8979443..9d390e1 100644
--- a/src/Floquet/FloqOp.h
+++ b/src/Floquet/FloqOp.h
@@ -268,6 +268,18 @@ MSVCDLL floq_op exp();
         // Return               ExpFlOp : Exponential of Op1
         //                                   Op = exp(Op1)
 
+MSVCDLL friend floq_op expm(const floq_op& Op1);
+	
+        // Input		Op1  : Floquet operator
+        // Return		Op   : exponential of Op1 using Pade
+	//			       Op = exp(Op1)
+
+MSVCDLL floq_op expm();
+
+        // Input                FlOp    : Floquet operator (*this)
+        // Return               ExpFlOp : Exponential of Op1 using Pade
+        //                                   Op = exp(Op1)
+
 MSVCDLL friend floq_op prop(floq_op& FLOQHAM, double time);
  
 	// Input		Op1   : Floquet Hamiltonian
diff --git a/src/HSLib/GenOp.cc b/src/HSLib/GenOp.cc
index b895df6..36512e9 100644
--- a/src/HSLib/GenOp.cc
+++ b/src/HSLib/GenOp.cc
@@ -1165,13 +1165,14 @@ complex gen_op::proj(const gen_op &Op2, int normf) const
   }
 
 
+	// Input		Op   : General operator (this)
+        // Return		int  : Op Liouville space dimension
+
 int gen_op::dim()    const { return (!WBR)?0:WBR->RepMx.cols(); }
 int gen_op::HS()     const { return (!WBR)?0:WBR->RepMx.cols(); }
 int gen_op::LS()     const { return (!WBR)?0:WBR->RepBs.dim_LS(); }
 int gen_op::dim_LS() const { return (!WBR)?0:WBR->RepBs.dim_LS(); }
 
-	// Input		Op   : General operator (this)
-        // Return		int  : Op Liouville space dimension
 
 
 	// Input		Op1   : General operator (this)
@@ -1228,6 +1229,52 @@ gen_op gen_op::exp(const complex& t, double cutoff) const
   return ExpOp;
   }
  
+	// Input		Op   : General operator (this)
+        // Return		int  : Op Liouville space dimension
+
+
+	// Input		Op1   : General operator (this)
+        // Return		Op    : exponential of Op1
+	//				Op = exp(Op1)
+        // Note			      : Computed in same base as Op1
+        // Note			      : Returns I matrix for Null Op
+
+gen_op gen_op::expm() const
+  {
+  int hs = dim();				// Operator Hilbert space
+  if(!WBR)					// If we have no reps then
+    {						// return an identity matrix
+    if(!hs) GenOpfatality(3, "exp"); 		// or Op exponential error
+    return gen_op(matrix(hs,hs,i_matrix_type)); // if there is no dimension
+    }
+  gen_op ExpOp(*this); 				// Copy Op in diagonal form
+  matrix mx1 = this->get_mx();
+  ExpOp.put_mx(mx1.expm());
+  return ExpOp;
+  }     
+
+
+
+        // Input                Op    : Operator (this)
+        //                      t     : Exponential factor
+        //                      cutoff: Exponential factor roundoff
+        // Return               ExpOp : Exponential of Op
+        //                              ExpOp = exp(t*Op)
+        // Note                       : Exponential output in same base as Op
+        // Note                       : Value of t is considered 0 if
+        //                              it's magnituded is less than cutoff
+
+gen_op gen_op::expm(const complex& t, double cutoff) const
+  {
+  int hs = dim();				// Operator Hilbert space
+  if(!WBR && !hs) GenOpfatality(3,"exp"); 	// Op exponential error
+  if(!WBR || norm(t) < fabs(cutoff))
+    return gen_op(matrix(hs, hs, i_matrix_type));
+  gen_op ExpOp(*this); 
+  matrix mx1 = this->get_mx()*t;
+  ExpOp.put_mx(mx1.expm());
+  return ExpOp;
+  }
  
 
         // Input                Op      : Operator (this)
@@ -1682,9 +1729,14 @@ void gen_op::status(int pf) const
       case i_matrix_type:
       case d_matrix_type:
       case n_matrix_type:
-        if((REP->RepMx).test_hermitian()) std::cout << ", Hermitian"; 
-        else                              std::cout << ", Non-Hermitian"; break;
-      default:                            std::cout << ", Non-Hermitian"; break;
+        if((REP->RepMx).test_hermitian())
+	  std::cout << ", Hermitian"; 
+        else                              
+	  std::cout << ", Non-Hermitian";
+	break;
+      default:                            
+	std::cout << ", Non-Hermitian";
+	break;
       }
 
     std::cout << "\n\t     - basis  = " << (REP->RepBs).refs() << " refs";
@@ -1702,9 +1754,14 @@ void gen_op::status(int pf) const
       case i_matrix_type:
       case d_matrix_type:
       case n_matrix_type:
-        if((REP->RepMx).test_hermitian()) std::cout << ", Hermitian";
-        else                              std::cout << ", Non-Hermitian"; break;
-      default:                            std::cout << ", Non-Hermitian"; break;
+        if((REP->RepMx).test_hermitian())
+	  std::cout << ", Hermitian";
+        else                             
+	  std::cout << ", Non-Hermitian";
+	break;
+      default:                           
+	std::cout << ", Non-Hermitian";
+	break;
       }
     REP++;
     item++;
@@ -1904,8 +1961,7 @@ void gen_op::SetLimits(int limit) const
   std::cout << "\n\tLimiting No. Of ";
   if(OpName.length()) std::cout << OpName << " ";
   std::cout << "Operator Representations";
-  std::cout << "\n\tCurrent Representations: " 
-            << size();
+  std::cout << "\n\tCurrent Representations: " << size();
   std::cout << ", Maximum Allowed: " << limit; 
 # endif
   gen_op* OpTmp = const_cast<gen_op*>(this);	// Cast away const (mutable!)
diff --git a/src/HSLib/GenOp.h b/src/HSLib/GenOp.h
index 7dbed89..22abb17 100644
--- a/src/HSLib/GenOp.h
+++ b/src/HSLib/GenOp.h
@@ -429,6 +429,24 @@ MSVCDLL gen_op exp(const complex& t, double cutoff=1.e-12) const;
         // Note                       : Value of t is considered 0 if
         //                              it's magnituded is less than cutoff
 
+MSVCDLL gen_op expm() const;
+
+	// Input		Op1  : General operator (this)
+        // Return		Op   : exponential of Op1
+	//			       Op = exp(Op1) using pade
+        // Note			     : Computed in the same base as Op1
+
+
+MSVCDLL gen_op expm(const complex& t, double cutoff=1.e-12) const;
+
+        // Input                Op    : Operator (this)
+        //                      t     : Exponential factor
+        //                      cutoff: Exponential factor roundoff
+        // Return               ExpOp : Exponential of Op
+        //                              ExpOp = exp(t*Op) using Pade
+        // Note                       : Exponential output in same base as Op
+        // Note                       : Value of t is considered 0 if
+        //                              it's magnituded is less than cutoff
 
 MSVCDLL gen_op Pow(int power) const;
 

From 93cd360e64eda66c8580b8c6534b4f41e86f91a5 Mon Sep 17 00:00:00 2001
From: Matthias Ernst <maer@ethz.ch>
Date: Mon, 6 Jan 2025 16:28:27 +0100
Subject: [PATCH 5/5] Corrected an error that would not calculate the
 properexpm of a super operator.

---
 src/LSLib/SuperOp.cc | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/LSLib/SuperOp.cc b/src/LSLib/SuperOp.cc
index a87a0db..badc9f9 100644
--- a/src/LSLib/SuperOp.cc
+++ b/src/LSLib/SuperOp.cc
@@ -1422,7 +1422,7 @@ super_op expm(const super_op& LOp1, const complex& t)
         // Note			      : Computed in EBR of LOp1
 
   {
-  super_op LOp(matrix(LOp1.LSp, LOp1.LSp, d_matrix_type));
+  super_op LOp = LOp1;
   if(LOp.HSp) 				// Check for NULL LOp
     {
     if(t != complex0)
@@ -1436,8 +1436,6 @@ super_op expm(const super_op& LOp1, const complex& t)
       matrix mx(LOp1.LSp, LOp1.LSp, i_matrix_type);	// LOp is the identity superoperator
       LOp.mx = mx;
       }
-    LOp.Hbs = LOp1.Hbs;			// LOp Hilbert space basis same as LOp1's
-    LOp.Lbs = LOp1.Lbs;			// LOp Liouville space basis same as LOp1's
     }
   else
     {