From c694d90c91a0ad45deede302345afcc16f099b66 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 15 Jul 2013 13:49:05 +0200 Subject: [PATCH 001/118] MIC requires that mpi.h be included before stdio.h (issue #12) --- PSKOutput3D/PSKhdf5adaptor.cpp | 1 + bc/BcParticles.cpp | 1 + fields/EMfields3D.cpp | 1 + grids/Grid3DCU.cpp | 1 + iPic3D.cpp | 1 + inputoutput/Collective.cpp | 1 + inputoutput/WriteOutputParallel.cpp | 1 + inputoutput/phdf5.cpp | 1 + particles/Particles3D.cpp | 1 + particles/Particles3Dcomm.cpp | 1 + solvers/CG.cpp | 1 + solvers/GMRES.cpp | 1 + 12 files changed, 12 insertions(+) diff --git a/PSKOutput3D/PSKhdf5adaptor.cpp b/PSKOutput3D/PSKhdf5adaptor.cpp index bb220cce..33c83115 100644 --- a/PSKOutput3D/PSKhdf5adaptor.cpp +++ b/PSKOutput3D/PSKhdf5adaptor.cpp @@ -1,4 +1,5 @@ +#include #include "PSKhdf5adaptor.h" using namespace PSK; diff --git a/bc/BcParticles.cpp b/bc/BcParticles.cpp index b26dee05..ff3999a3 100644 --- a/bc/BcParticles.cpp +++ b/bc/BcParticles.cpp @@ -1,4 +1,5 @@ +#include #include "BcParticles.h" /** set the boundary VirtualTopology3D3Dcondition for particle in 2D diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 585d8dc9..bca24228 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -1,4 +1,5 @@ +#include #include "EMfields3D.h" /*! constructor */ diff --git a/grids/Grid3DCU.cpp b/grids/Grid3DCU.cpp index 1fd94891..c2b92797 100644 --- a/grids/Grid3DCU.cpp +++ b/grids/Grid3DCU.cpp @@ -1,4 +1,5 @@ +#include #include "Grid3DCU.h" /*! constructor */ diff --git a/iPic3D.cpp b/iPic3D.cpp index 7fc86b24..1803f2aa 100644 --- a/iPic3D.cpp +++ b/iPic3D.cpp @@ -1,4 +1,5 @@ +#include #include #include "iPic3D.h" diff --git a/inputoutput/Collective.cpp b/inputoutput/Collective.cpp index 1081f4a7..9f11e3de 100644 --- a/inputoutput/Collective.cpp +++ b/inputoutput/Collective.cpp @@ -1,4 +1,5 @@ +#include #include "Collective.h" /*! Read the input file from text file and put the data in a collective wrapper: if it's a restart read from input file basic sim data and load particles and EM field from restart file */ diff --git a/inputoutput/WriteOutputParallel.cpp b/inputoutput/WriteOutputParallel.cpp index 2f83af88..6de0d2ea 100644 --- a/inputoutput/WriteOutputParallel.cpp +++ b/inputoutput/WriteOutputParallel.cpp @@ -1,4 +1,5 @@ +#include #include "WriteOutputParallel.h" void WriteOutputParallel(Grid3DCU *grid, EMfields3D *EMf, CollectiveIO *col, VCtopology3D *vct, int cycle){ diff --git a/inputoutput/phdf5.cpp b/inputoutput/phdf5.cpp index 9ffc94f3..591f54ca 100644 --- a/inputoutput/phdf5.cpp +++ b/inputoutput/phdf5.cpp @@ -1,4 +1,5 @@ +#include #include "phdf5.h" PHDF5fileClass::PHDF5fileClass(string filestr, int nd, int *coord, MPI_Comm mpicomm){ diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index dc24f0ca..a2a7f05c 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -5,6 +5,7 @@ developers: Stefano Markidis, Giovanni Lapenta ********************************************************************************************/ +#include #include #include diff --git a/particles/Particles3Dcomm.cpp b/particles/Particles3Dcomm.cpp index d7fcef81..db0ec204 100644 --- a/particles/Particles3Dcomm.cpp +++ b/particles/Particles3Dcomm.cpp @@ -4,6 +4,7 @@ developers: Stefano Markidis, Giovanni Lapenta. ********************************************************************************************/ +#include #include #include #include "VirtualTopology3D.h" diff --git a/solvers/CG.cpp b/solvers/CG.cpp index 7e9f41ef..8b36d461 100644 --- a/solvers/CG.cpp +++ b/solvers/CG.cpp @@ -1,4 +1,5 @@ +#include #include "CG.h" /** diff --git a/solvers/GMRES.cpp b/solvers/GMRES.cpp index c245c79a..1e3d4b1b 100644 --- a/solvers/GMRES.cpp +++ b/solvers/GMRES.cpp @@ -1,4 +1,5 @@ +#include #include "GMRES.h" void GMRES(FIELD_IMAGE FunctionImage, double *xkrylov, int xkrylovlen, double *b, int m, int max_iter, double tol, Grid * grid, VirtualTopology3D * vct, Field * field) { From c4eaee26ea1dadbb0f944cc938812d8e0e5a2515 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 15 Jul 2013 21:20:35 +0200 Subject: [PATCH 002/118] Issue #29: add errors.h for diagnostics: eprintf() and invalid_value_error() --- include/errors.h | 25 +++++++++++++++++++++++++ utility/errors.cpp | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 include/errors.h create mode 100644 utility/errors.cpp diff --git a/include/errors.h b/include/errors.h new file mode 100644 index 00000000..ca80313f --- /dev/null +++ b/include/errors.h @@ -0,0 +1,25 @@ +#ifndef ipic_errors_H +#define ipic_errors_H + +void errmsg_printf_fileLine(const char *func, const char *file, int line_number, const char *format, ...); +void eprintf_fileLine(const char *func, const char *file, int line_number, const char *format, ...); +void Wprintf_fileLine(const char *func, const char *file, int line_number, const char *format, ...); + +#define errmsg_printf(args...) \ + errmsg_printf_fileLine(__func__, __FILE__, __LINE__, ## args); +#define eprintf(args...) \ + errmsg_printf_fileLine(__func__, __FILE__, __LINE__, ## args); +#define Wprintf(args...) \ + Wprintf_fileLine(__func__, __FILE__, __LINE__, ## args); +#define declare_invalid_value_error(t1) \ + void invalid_value_error_fileLine(const char* file, int line, const char* func, \ + const char* type, const char* expr, t1 val); +declare_invalid_value_error(double); +declare_invalid_value_error(int); +declare_invalid_value_error(const char*); +#define unsupported_value_error(val) invalid_value_error_fileLine( \ + __FILE__, __LINE__, __func__, "unsupported", #val, val); +#define invalid_value_error(val) invalid_value_error_fileLine( \ + __FILE__, __LINE__, __func__, "invalid", #val, val); + +#endif diff --git a/utility/errors.cpp b/utility/errors.cpp new file mode 100644 index 00000000..3572df99 --- /dev/null +++ b/utility/errors.cpp @@ -0,0 +1,44 @@ + +#include +#include +#include +#include "errors.h" +//#include "MPIdata.h" // for rank + +/** implementation of declarations in errors.h **/ + +void errmsg_printf_fileLine(const char *func, const char *file, int line_number, + const char *format, ...) +{ + FILE* fptr = stdout; + fflush(fptr); + va_list args; + va_start(args, format); + fprintf(fptr, "ERROR in function %s, file %s, line %d: \n\t", + func, file, line_number); + /* print out remainder of message */ + vfprintf(fptr, format, args); + va_end(args); + // append terminating newline so user does not have to do it + fprintf(fptr, "\n"); + fflush(fptr); + + abort(); +} + +#include +using namespace std; +#define implement_invalid_value_error(t1) \ + void invalid_value_error_fileLine(const char* file, int line, const char* func, \ + const char* type, const char* expr, t1 val) \ + { \ + std::cerr<< "ERROR in file " << file << ", line " << line \ + << ", function " << func \ + <<"\n\t" << type << " value: " << expr << " = " << val << endl; \ + abort(); \ + } + +implement_invalid_value_error(double); +implement_invalid_value_error(int); +implement_invalid_value_error(const char*); + From 37414e353b3aa458a6b81325a645f1db8faf6b09 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 15 Jul 2013 21:32:35 +0200 Subject: [PATCH 003/118] issue #30: MPIdata should be a singleton --- iPic3D.cpp | 3 +++ include/MPIdata.h | 60 ++++++++++++++------------------------------- include/PSKOutput.h | 4 +-- include/iPic3D.h | 2 +- main/iPic3Dlib.cpp | 15 +++++++++--- utility/MPIdata.cpp | 59 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 95 insertions(+), 48 deletions(-) create mode 100644 utility/MPIdata.cpp diff --git a/iPic3D.cpp b/iPic3D.cpp index 1803f2aa..5e31c535 100644 --- a/iPic3D.cpp +++ b/iPic3D.cpp @@ -2,6 +2,7 @@ #include #include #include "iPic3D.h" +#include "debug.h" using namespace iPic3D; @@ -10,6 +11,8 @@ int main(int argc, char **argv) { iPic3D::c_Solver KCode; bool b_err = false; + MPIdata::init(&argc, &argv); + //dprintf("MPI has been initialized."); KCode.Init(argc, argv); for (int i = KCode.FirstCycle(); i < KCode.LastCycle(); i++) { diff --git a/include/MPIdata.h b/include/MPIdata.h index 07efe13b..c4d535ab 100644 --- a/include/MPIdata.h +++ b/include/MPIdata.h @@ -11,11 +11,6 @@ email : markidis@lanl.gov, lapenta@lanl.gov #define MPIDATA_H #include -#include - -using std::cout; -using std::endl; - /** * MPI Data Structure. This class contains: * @@ -29,55 +24,38 @@ using std::endl; * (C) 2004 Los Alamos National Laboratory * @author Stefano Markidis, Giovanni Lapenta * @version 1.0 + * + * I made this class a singleton. It should only be created once, + * since MPI_Init should be called only once. -Alec */ class MPIdata { public: - /** constructor: setup MPI environment */ - MPIdata(int *, char ***); - /** destructor */ - ~MPIdata(); - /** initialize MPIdata */ - void init(int *, char ***); + static MPIdata& instance(); +private: + // disable constructor and destructor of this singleton + // by making them private. + ~MPIdata(){} + MPIdata(){} +public: + /** initialize MPI environment */ + static void init(int *, char ***); /** close MPI environment */ void finalize_mpi(); /** print MPI data structure */ void Print(void); /** MPI status during the communication */ MPI_Status status; +public: + static int get_rank(){return instance().rank;} + static int get_nprocs(){return instance().nprocs;} +private: /** rank of the process */ - int rank; + static int rank; /** number of processes */ - int nprocs; + static int nprocs; + // evidently unused... char *buffer; int buffer_size; }; -inline MPIdata::MPIdata(int *argc, char ***argv) { - /* Initialize the MPI API */ - MPI_Init(argc, argv); - - /* Set rank */ - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - - /* Set total number of processors */ - MPI_Comm_size(MPI_COMM_WORLD, &nprocs); - -} - -inline MPIdata::~MPIdata() { -} - -inline void MPIdata::finalize_mpi() { - MPI_Finalize(); -} - -inline void MPIdata::Print(void) { - cout << endl; - cout << "Number of processes = " << nprocs << endl; - cout << "-------------------------" << endl; - cout << endl; -} - -// extern MPIdata *mpi; // instantiated in iPIC3D.cpp - #endif diff --git a/include/PSKOutput.h b/include/PSKOutput.h index d1e10dc6..604387cf 100644 --- a/include/PSKOutput.h +++ b/include/PSKOutput.h @@ -335,7 +335,7 @@ template < class Toa > class myOutputAgent:public PSK::OutputAgent < Toa > { stringstream ss; stringstream cc; stringstream ii; - ss << _mpi->rank; + ss << _mpi->get_rank(); cc << cycle; const int ns = _col->getNs(); if (tag.find("last_cycle", 0) != string::npos) @@ -608,7 +608,7 @@ template < class Toa > class myOutputAgent:public PSK::OutputAgent < Toa > { void output(const string & tag, int cycle, int sample) { stringstream ss; stringstream cc; - ss << _mpi->rank; + ss << _mpi->get_rank(); cc << cycle; const int ns = _col->getNs(); diff --git a/include/iPic3D.h b/include/iPic3D.h index a1b551f5..440fdf02 100644 --- a/include/iPic3D.h +++ b/include/iPic3D.h @@ -46,7 +46,7 @@ namespace iPic3D { inline int get_myrank(); private: - MPIdata * mpi; + static MPIdata * mpi; Collective *col; VCtopology3D *vct; Grid3DCU *grid; diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 1f15e0dc..54e8495a 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -2,14 +2,20 @@ #include "iPic3D.h" using namespace iPic3D; +MPIdata* iPic3D::c_Solver::mpi=0; int c_Solver::Init(int argc, char **argv) { - // initialize MPI environment + // get MPI data + // + // c_Solver is not a singleton, so the following line was pulled out. + //MPIdata::init(&argc, &argv); + // + // initialized MPI environment // nprocs = number of processors // myrank = rank of tha process*/ - mpi = new MPIdata(&argc, &argv); - nprocs = mpi->nprocs; - myrank = mpi->rank; + mpi = &MPIdata::instance(); + nprocs = MPIdata::get_nprocs(); + myrank = MPIdata::get_rank(); col = new Collective(argc, argv); // Every proc loads the parameters of simulation from class Collective verbose = col->getVerbose(); @@ -353,3 +359,4 @@ void c_Solver::Finalize() { // close MPI mpi->finalize_mpi(); } + diff --git a/utility/MPIdata.cpp b/utility/MPIdata.cpp new file mode 100644 index 00000000..6baa3697 --- /dev/null +++ b/utility/MPIdata.cpp @@ -0,0 +1,59 @@ +#include +#include +#include +#include "MPIdata.h" + +using std::cout; +using std::endl; + +// code to check that init() is called before instance() +// +// no need for this to have more than file scope +int MPIdata::rank=-1; +int MPIdata::nprocs=-1; +static bool MPIdata_is_initialized=false; +bool MPIdata_assert_initialized() +{ + assert(MPIdata_is_initialized); + return true; +} + +MPIdata& MPIdata::instance() +{ + // This is executed on the first call to check that + // MPIdata has first been initialized. + static bool check = MPIdata_assert_initialized(); + static MPIdata* instance = new MPIdata; + // After the first call, this is the only line + // that is actually executed. + return *instance; +} + +void MPIdata::init(int *argc, char ***argv) { + assert(!MPIdata_is_initialized); + + /* Initialize the MPI API */ + MPI_Init(argc, argv); + + /* Set rank */ + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + /* Set total number of processors */ + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + MPIdata_is_initialized = true; +} + +void MPIdata::finalize_mpi() { + MPI_Finalize(); +} + +void MPIdata::Print(void) { + cout << endl; + cout << "Number of processes = " << get_nprocs() << endl; + cout << "-------------------------" << endl; + cout << endl; +} + +// extern MPIdata *mpi; // instantiated in iPIC3D.cpp + From 984f59954cab1b85287b4cd08f224b2ca7a7cc09 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 15 Jul 2013 21:34:00 +0200 Subject: [PATCH 004/118] debug.h was broken under commit bd0fa30835c. This uses MPIdata singleton to fix it. --- include/debug.h | 2 +- utility/debug.cpp | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/debug.h b/include/debug.h index 04b69172..6532f599 100644 --- a/include/debug.h +++ b/include/debug.h @@ -4,7 +4,7 @@ #ifndef __DEBUG_H__ #define __DEBUG_H__ -#include +#include #include #include "debug.h" diff --git a/utility/debug.cpp b/utility/debug.cpp index e4271d88..306775fb 100644 --- a/utility/debug.cpp +++ b/utility/debug.cpp @@ -1,11 +1,12 @@ +#include "MPIdata.h" // for get_rank #include "debug.h" #define implement_dprintvar_fileLine(code,type) \ void dprintvar_fileLine(const char* func, const char* file, int line, \ const char* name, type val) \ { \ - dfprintf_fileLine(stderr,func,file,line, code " == %s",val,name); \ + dfprintf_fileLine(stdout,func,file,line, code " == %s",val,name); \ } implement_dprintvar_fileLine("%s", const char *); @@ -16,8 +17,10 @@ void dfprintf_fileLine(FILE * fptr, const char *func, const char *file, int line fflush(fptr); va_list args; va_start(args, format); - fprintf(fptr, "(%d) DEBUG %s(), %s:%d: ", func, file, // my_basename(file), - line_number); + fprintf(fptr, "(%d) DEBUG %s(), %s:%d: ", + MPIdata::get_rank(), + func, file, // my_basename(file), + line_number); /* print out remainder of message */ vfprintf(fptr, format, args); va_end(args); From da737d93b64bf3732bd46f033f15361f3fb76ee5 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 15 Jul 2013 21:38:35 +0200 Subject: [PATCH 005/118] issue#31: consistently use stdout rather than stderr --- utility/asserts.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utility/asserts.cpp b/utility/asserts.cpp index 1de2301e..c5beed50 100644 --- a/utility/asserts.cpp +++ b/utility/asserts.cpp @@ -2,7 +2,7 @@ #include "asserts.h" void assert_error(const char *file, int line, const char *func, const char *op, const char *lhs_str, const char *rhs_str, double lhs, double rhs) { - fprintf(stderr, "ERROR in file %s, line %d, function %s" "\n\tassertion failed: %s %s %s, i.e., %24.16e %s %24.16e\n", file, line, func, lhs_str, op, rhs_str, lhs, op, rhs); + fprintf(stdout, "ERROR in file %s, line %d, function %s" "\n\tassertion failed: %s %s %s, i.e., %24.16e %s %24.16e\n", file, line, func, lhs_str, op, rhs_str, lhs, op, rhs); abort(); } From aeccc32e81466330a89ebd5a1d0cfaced4258fd6 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 15 Jul 2013 21:43:48 +0200 Subject: [PATCH 006/118] issue #32: implemented USING_PARALLEL_HDF5 so that users are not forced to compile with parallel hdf5 --- include/ipicdefs.h | 7 +++++++ inputoutput/phdf5.cpp | 12 ++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 include/ipicdefs.h diff --git a/include/ipicdefs.h b/include/ipicdefs.h new file mode 100644 index 00000000..d9b7720c --- /dev/null +++ b/include/ipicdefs.h @@ -0,0 +1,7 @@ +#ifndef __IPIC_DEFS_H__ +#define __IPIC_DEFS_H__ + +// uncomment the following line to use parallel hdf5 +//#define USING_PARALLEL_HDF5 + +#endif diff --git a/inputoutput/phdf5.cpp b/inputoutput/phdf5.cpp index 591f54ca..3eb39397 100644 --- a/inputoutput/phdf5.cpp +++ b/inputoutput/phdf5.cpp @@ -1,6 +1,8 @@ #include #include "phdf5.h" +#include "ipicdefs.h" +#include "errors.h" PHDF5fileClass::PHDF5fileClass(string filestr, int nd, int *coord, MPI_Comm mpicomm){ @@ -66,7 +68,12 @@ void PHDF5fileClass::CreatePHDF5file(double *L, int *dglob, int *dlocl, bool bp) /* 2- Tell HDF5 that we want to use MPI-IO */ /* --------------------------------------- */ + #ifdef USING_PARALLEL_HDF5 H5Pset_fapl_mpio(acc_t, comm, MPI_INFO_NULL); + #else + eprintf("WriteMethod==Parallel in input file " + "requires setting USING_PARALLEL_HDF5 in ipicdefs.h"); + #endif /* ------------------------------------------------------- */ /* 3- Load file identifier and release the access template */ @@ -201,7 +208,12 @@ int PHDF5fileClass::WritePHDF5dataset(string grpname, string datasetname, double /* --------------------------------- */ dataset_xfer = H5Pcreate(H5P_DATASET_XFER); + #ifdef USING_PARALLEL_HDF5 H5Pset_dxpl_mpio(dataset_xfer, H5FD_MPIO_COLLECTIVE); + #else + eprintf("WriteMethod==Parallel in input file " + "requires setting USING_PARALLEL_HDF5 in ipicdefs.h"); + #endif /* ---------------------------- */ /* 9- Write data to the dataset */ From 6c90362934902521a9a9a2fcf4e47a0a8df86090 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 16 Jul 2013 07:27:39 +0200 Subject: [PATCH 007/118] Restored implementation of TimeTasks (issue #17). Some communication time for accumulating moments was being reckoned as calculation time. --- communication/ComNodes3D.cpp | 53 +++++++-------- fields/EMfields3D.cpp | 5 +- iPic3D.cpp | 2 - include/ComNodes3D.h | 4 +- include/EMfields3D.h | 2 +- include/Particles3D.h | 2 +- main/iPic3Dlib.cpp | 23 ++++--- particles/Particles3D.cpp | 4 +- utility/TimeTasks.cpp | 124 +++++++++++++++++++++++++++++++++++ 9 files changed, 174 insertions(+), 45 deletions(-) create mode 100644 utility/TimeTasks.cpp diff --git a/communication/ComNodes3D.cpp b/communication/ComNodes3D.cpp index 575e8248..30cb5ed8 100644 --- a/communication/ComNodes3D.cpp +++ b/communication/ComNodes3D.cpp @@ -1,10 +1,11 @@ #include "ComNodes3D.h" +#include "TimeTasks.h" /** communicate ghost cells (FOR NODES) */ void communicateNode(int nx, int ny, int nz, double ***vector, VirtualTopology3D * vct) { + timeTasks.start_communicate(); - // timeTasks.start_communicate(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -101,12 +102,12 @@ void communicateNode(int nx, int ny, int nz, double ***vector, VirtualTopology3D delete[]ghostXrightYrightZsameEdge; delete[]ghostXleftYleftZsameEdge; delete[]ghostXleftYrightZsameEdge; - // timeTasks.addto_communicate(); + timeTasks.addto_communicate(); } /** communicate ghost cells (FOR NODES) */ void communicateNodeBC(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { - // timeTasks.start_communicate(); + timeTasks.start_communicate(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -206,12 +207,12 @@ void communicateNodeBC(int nx, int ny, int nz, double ***vector, int bcFaceXrigh delete[]ghostXrightYrightZsameEdge; delete[]ghostXleftYleftZsameEdge; delete[]ghostXleftYrightZsameEdge; - // timeTasks.addto_communicate(); + timeTasks.addto_communicate(); } /** communicate ghost cells (FOR NODES) with particles BC*/ void communicateNodeBC_P(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { - // timeTasks.start_communicate(); + timeTasks.start_communicate(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -311,14 +312,14 @@ void communicateNodeBC_P(int nx, int ny, int nz, double ***vector, int bcFaceXri delete[]ghostXrightYrightZsameEdge; delete[]ghostXleftYleftZsameEdge; delete[]ghostXleftYrightZsameEdge; - // timeTasks.addto_communicate(); + timeTasks.addto_communicate(); } /** SPECIES: communicate ghost cells */ void communicateNode(int nx, int ny, int nz, double ****vector, int ns, VirtualTopology3D * vct) { - // timeTasks.start_communicate(); + timeTasks.start_communicate(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -414,15 +415,15 @@ void communicateNode(int nx, int ny, int nz, double ****vector, int ns, VirtualT delete[]ghostXrightYrightZsameEdge; delete[]ghostXleftYleftZsameEdge; delete[]ghostXleftYrightZsameEdge; - // timeTasks.addto_communicate(); + timeTasks.addto_communicate(); } // // PARTICLES /** SPECIES: communicate ghost cells */ void communicateNode_P(int nx, int ny, int nz, double ****vector, int ns, VirtualTopology3D * vct) { + timeTasks.start_communicate(); - // timeTasks.start_communicate(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -518,15 +519,15 @@ void communicateNode_P(int nx, int ny, int nz, double ****vector, int ns, Virtua delete[]ghostXrightYrightZsameEdge; delete[]ghostXleftYleftZsameEdge; delete[]ghostXleftYrightZsameEdge; - // timeTasks.addto_communicate(); + timeTasks.addto_communicate(); } // /** communicate ghost cells (FOR CENTERS) */ void communicateCenter(int nx, int ny, int nz, double ***vector, VirtualTopology3D * vct) { - // timeTasks.start_communicate(); + timeTasks.start_communicate(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -621,12 +622,12 @@ void communicateCenter(int nx, int ny, int nz, double ***vector, VirtualTopology delete[]ghostXrightYrightZsameEdge; delete[]ghostXleftYleftZsameEdge; delete[]ghostXleftYrightZsameEdge; - // timeTasks.addto_communicate(); + timeTasks.addto_communicate(); } /** communicate ghost cells (FOR CENTERS) with BOX stencil*/ void communicateCenterBoxStencilBC(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { - // timeTasks.start_communicate(); + timeTasks.start_communicate(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -653,12 +654,12 @@ void communicateCenterBoxStencilBC(int nx, int ny, int nz, double ***vector, int delete[]ghostYleftFace; delete[]ghostZrightFace; delete[]ghostZleftFace; - // timeTasks.addto_communicate(); + timeTasks.addto_communicate(); } // particles /** communicate ghost cells (FOR CENTERS) with BOX stencil*/ void communicateCenterBoxStencilBC_P(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { - // timeTasks.start_communicate(); + timeTasks.start_communicate(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -685,14 +686,14 @@ void communicateCenterBoxStencilBC_P(int nx, int ny, int nz, double ***vector, i delete[]ghostYleftFace; delete[]ghostZrightFace; delete[]ghostZleftFace; - // timeTasks.addto_communicate(); + timeTasks.addto_communicate(); } // void communicateNodeBoxStencilBC(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { - // timeTasks.start_communicate(); + timeTasks.start_communicate(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -719,11 +720,11 @@ void communicateNodeBoxStencilBC(int nx, int ny, int nz, double ***vector, int b delete[]ghostYleftFace; delete[]ghostZrightFace; delete[]ghostZleftFace; - // timeTasks.addto_communicate(); + timeTasks.addto_communicate(); } void communicateNodeBoxStencilBC_P(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { - // timeTasks.start_communicate(); + timeTasks.start_communicate(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -750,15 +751,15 @@ void communicateNodeBoxStencilBC_P(int nx, int ny, int nz, double ***vector, int delete[]ghostYleftFace; delete[]ghostZrightFace; delete[]ghostZleftFace; - // timeTasks.addto_communicate(); + timeTasks.addto_communicate(); } /** SPECIES: communicate ghost cells */ void communicateCenter(int nx, int ny, int nz, double ****vector, int ns, VirtualTopology3D * vct) { + timeTasks.start_communicate(); - // timeTasks.start_communicate(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -852,13 +853,13 @@ void communicateCenter(int nx, int ny, int nz, double ****vector, int ns, Virtua delete[]ghostXrightYrightZsameEdge; delete[]ghostXleftYleftZsameEdge; delete[]ghostXleftYrightZsameEdge; - // timeTasks.addto_communicate(); + timeTasks.addto_communicate(); } // /////////// communication + BC //////////////////////////// void communicateCenterBC(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { + timeTasks.start_communicate(); - // timeTasks.start_communicate(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -955,13 +956,13 @@ void communicateCenterBC(int nx, int ny, int nz, double ***vector, int bcFaceXri delete[]ghostXrightYrightZsameEdge; delete[]ghostXleftYleftZsameEdge; delete[]ghostXleftYrightZsameEdge; - // timeTasks.addto_communicate(); + timeTasks.addto_communicate(); } // /////////// communication + BC //////////////////////////// void communicateCenterBC_P(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { + timeTasks.start_communicate(); - // timeTasks.start_communicate(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -1058,6 +1059,6 @@ void communicateCenterBC_P(int nx, int ny, int nz, double ***vector, int bcFaceX delete[]ghostXrightYrightZsameEdge; delete[]ghostXleftYleftZsameEdge; delete[]ghostXleftYrightZsameEdge; - // timeTasks.addto_communicate(); + timeTasks.addto_communicate(); } diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index bca24228..28de6676 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -1,6 +1,7 @@ #include #include "EMfields3D.h" +#include "TimeTasks.h" /*! constructor */ EMfields3D::EMfields3D(Collective * col, Grid * grid) { @@ -1072,6 +1073,8 @@ void EMfields3D::interpDensitiesN2C(VirtualTopology3D * vct, Grid * grid) { /*! communicate ghost for grid -> Particles interpolation */ void EMfields3D::communicateGhostP2G(int ns, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, VirtualTopology3D * vct) { // interpolate adding common nodes among processors + timeTasks.start_communicate(); + communicateInterp(nxn, nyn, nzn, ns, rhons, 0, 0, 0, 0, 0, 0, vct); communicateInterp(nxn, nyn, nzn, ns, Jxs, 0, 0, 0, 0, 0, 0, vct); communicateInterp(nxn, nyn, nzn, ns, Jys, 0, 0, 0, 0, 0, 0, vct); @@ -1085,6 +1088,7 @@ void EMfields3D::communicateGhostP2G(int ns, int bcFaceXright, int bcFaceXleft, // calculate the correct densities on the boundaries adjustNonPeriodicDensities(ns, vct); // put the correct values on ghost cells + timeTasks.addto_communicate(); communicateNode_P(nxn, nyn, nzn, rhons, ns, vct); communicateNode_P(nxn, nyn, nzn, Jxs, ns, vct); @@ -1096,7 +1100,6 @@ void EMfields3D::communicateGhostP2G(int ns, int bcFaceXright, int bcFaceXleft, communicateNode_P(nxn, nyn, nzn, pYYsn, ns, vct); communicateNode_P(nxn, nyn, nzn, pYZsn, ns, vct); communicateNode_P(nxn, nyn, nzn, pZZsn, ns, vct); - } diff --git a/iPic3D.cpp b/iPic3D.cpp index 5e31c535..d670264c 100644 --- a/iPic3D.cpp +++ b/iPic3D.cpp @@ -2,7 +2,6 @@ #include #include #include "iPic3D.h" -#include "debug.h" using namespace iPic3D; @@ -12,7 +11,6 @@ int main(int argc, char **argv) { bool b_err = false; MPIdata::init(&argc, &argv); - //dprintf("MPI has been initialized."); KCode.Init(argc, argv); for (int i = KCode.FirstCycle(); i < KCode.LastCycle(); i++) { diff --git a/include/ComNodes3D.h b/include/ComNodes3D.h index 3b465c49..8e1636e5 100644 --- a/include/ComNodes3D.h +++ b/include/ComNodes3D.h @@ -11,9 +11,9 @@ developers : Stefano Markidis, Giovanni Lapenta #define ComNodes_H #include "ComBasic3D.h" -#include "TimeTasks.h" +//#include "TimeTasks.h" -extern TimeTasks timeTasks; +//extern TimeTasks timeTasks; // boundary condition for fields #include "BcFields3D.h" diff --git a/include/EMfields3D.h b/include/EMfields3D.h index 23ecdeea..9faca505 100644 --- a/include/EMfields3D.h +++ b/include/EMfields3D.h @@ -21,7 +21,7 @@ #include "Collective.h" #include "ComNodes3D.h" #include "ComInterpNodes3D.h" -#include "TimeTasks.h" +//#include "TimeTasks.h" #include "asserts.h" #include "BCStructure.h" diff --git a/include/Particles3D.h b/include/Particles3D.h index 8d701c35..2c178918 100644 --- a/include/Particles3D.h +++ b/include/Particles3D.h @@ -8,7 +8,7 @@ developers: Stefano Markidis, Enrico Camporeale, Giovanni Lapenta, David Burgess #define Part2D_H #include "Particles3Dcomm.h" -#include "TimeTasks.h" +//#include "TimeTasks.h" /** * diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 54e8495a..6cd4531e 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -1,5 +1,6 @@ #include "iPic3D.h" +#include "TimeTasks.h" using namespace iPic3D; MPIdata* iPic3D::c_Solver::mpi=0; @@ -164,15 +165,17 @@ int c_Solver::Init(int argc, char **argv) { void c_Solver::CalculateField() { - // timeTasks.resetCycle(); + timeTasks.resetCycle(); // interpolation - // timeTasks.start(TimeTasks::MOMENTS); + timeTasks.start(TimeTasks::MOMENTS); EMf->updateInfoFields(grid,vct,col); EMf->setZeroDensities(); // set to zero the densities for (int i = 0; i < ns; i++) + { part[i].interpP2G(EMf, grid, vct); // interpolate Particles to Grid(Nodes) + } EMf->sumOverSpecies(vct); // sum all over the species @@ -188,12 +191,12 @@ void c_Solver::CalculateField() { EMf->interpDensitiesN2C(vct, grid); // calculate densities on centers from nodes EMf->calculateHatFunctions(grid, vct); // calculate the hat quantities for the implicit method MPI_Barrier(MPI_COMM_WORLD); - // timeTasks.end(TimeTasks::MOMENTS); + timeTasks.end(TimeTasks::MOMENTS); // MAXWELL'S SOLVER - // timeTasks.start(TimeTasks::FIELDS); + timeTasks.start(TimeTasks::FIELDS); EMf->calculateE(grid, vct, col); // calculate the E field - // timeTasks.end(TimeTasks::FIELDS); + timeTasks.end(TimeTasks::FIELDS); } @@ -203,13 +206,13 @@ bool c_Solver::ParticlesMover() { /* Particle mover */ /* -------------- */ - // timeTasks.start(TimeTasks::PARTICLES); + timeTasks.start(TimeTasks::PARTICLES); for (int i = 0; i < ns; i++) // move each species { // #pragma omp task inout(part[i]) in(grid) target_device(booster) mem_avail = part[i].mover_PC(grid, vct, EMf); // use the Predictor Corrector scheme } - // timeTasks.end(TimeTasks::PARTICLES); + timeTasks.end(TimeTasks::PARTICLES); if (mem_avail < 0) { // not enough memory space allocated for particles: stop the simulation if (myrank == 0) { @@ -252,12 +255,12 @@ bool c_Solver::ParticlesMover() { /* This step must be taken out of here! */ /* --------------------- */ - // timeTasks.start(TimeTasks::BFIELD); + timeTasks.start(TimeTasks::BFIELD); EMf->calculateB(grid, vct, col); // calculate the B field - // timeTasks.end(TimeTasks::BFIELD); + timeTasks.end(TimeTasks::BFIELD); // print out total time for all tasks - // timeTasks.print_cycle_times(); + timeTasks.print_cycle_times(); return (false); } diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index a2a7f05c..4dfeb724 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -510,7 +510,7 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { // ********************// // COMMUNICATION // *******************// - // timeTasks.start_communicate(); + timeTasks.start_communicate(); const int avail = communicate(vct); if (avail < 0) return (-1); @@ -523,7 +523,7 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { return (-1); MPI_Barrier(MPI_COMM_WORLD); } - // timeTasks.addto_communicate(); + timeTasks.addto_communicate(); return (0); // exit succcesfully (hopefully) } diff --git a/utility/TimeTasks.cpp b/utility/TimeTasks.cpp new file mode 100644 index 00000000..69f56fb7 --- /dev/null +++ b/utility/TimeTasks.cpp @@ -0,0 +1,124 @@ + +#include +#include +#include "TimeTasks.h" +#include "asserts.h" +#include "MPIdata.h" // for get_rank + +/** implementation of declarations in utility/TimeTasks.h **/ + +TimeTasks timeTasks; + +void TimeTasks::resetCycle() +{ + for(int e=0;e Date: Tue, 16 Jul 2013 08:00:23 +0200 Subject: [PATCH 008/118] issue #27: rhocs last dimension is allocated incorrectly in EMfields.cpp --- fields/EMfields3D.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 28de6676..1dc44a12 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -119,7 +119,7 @@ EMfields3D::EMfields3D(Collective * col, Grid * grid) { Jz_ext = newArr3(double,nxn,nyn,nzn); // involving species rhons = newArr4(double, ns, nxn, nyn, nzn); - rhocs = newArr4(double, ns, nxc, nyc, nzn); + rhocs = newArr4(double, ns, nxc, nyc, nzc); Jxs = newArr4(double, ns, nxn, nyn, nzn); Jys = newArr4(double, ns, nxn, nyn, nzn); Jzs = newArr4(double, ns, nxn, nyn, nzn); From c201c8c3b72622f58818336735ec843fba967cd3 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 16 Jul 2013 10:34:25 +0200 Subject: [PATCH 009/118] issue #35: making MPI_Barrier() a no-op --- communication/ComInterpNodes3D.cpp | 1 + communication/ComNodes3D.cpp | 1 + include/ComParticles3D.h | 1 + include/ipicdefs.h | 3 +++ main/iPic3Dlib.cpp | 1 + particles/Particles3D.cpp | 1 + performances/Timing.cpp | 1 + 7 files changed, 9 insertions(+) diff --git a/communication/ComInterpNodes3D.cpp b/communication/ComInterpNodes3D.cpp index b03b308b..1e24dd73 100644 --- a/communication/ComInterpNodes3D.cpp +++ b/communication/ComInterpNodes3D.cpp @@ -1,5 +1,6 @@ #include "ComInterpNodes3D.h" +#include "ipicdefs.h" /** communicate ghost cells and sum the contribution with a index indicating the number of species*/ void communicateInterp(int nx, int ny, int nz, int ns, double ****vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { diff --git a/communication/ComNodes3D.cpp b/communication/ComNodes3D.cpp index 30cb5ed8..977494f4 100644 --- a/communication/ComNodes3D.cpp +++ b/communication/ComNodes3D.cpp @@ -1,6 +1,7 @@ #include "ComNodes3D.h" #include "TimeTasks.h" +#include "ipicdefs.h" /** communicate ghost cells (FOR NODES) */ void communicateNode(int nx, int ny, int nz, double ***vector, VirtualTopology3D * vct) { diff --git a/include/ComParticles3D.h b/include/ComParticles3D.h index da7c3470..76090d45 100644 --- a/include/ComParticles3D.h +++ b/include/ComParticles3D.h @@ -10,6 +10,7 @@ developers : Stefano Markidis, Giovanni Lapenta #define ComParticles3D_H #include "MPIdata.h" +#include "ipicdefs.h" #include "ComBasic3D.h" /** comunicate particles and receive particles to and from 6 processors */ diff --git a/include/ipicdefs.h b/include/ipicdefs.h index d9b7720c..dfa44969 100644 --- a/include/ipicdefs.h +++ b/include/ipicdefs.h @@ -4,4 +4,7 @@ // uncomment the following line to use parallel hdf5 //#define USING_PARALLEL_HDF5 +// use precprocessor to remove MPI_Barrier() calls. +#define MPI_Barrier(args...) + #endif diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 6cd4531e..418fc8ce 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -1,6 +1,7 @@ #include "iPic3D.h" #include "TimeTasks.h" +#include "ipicdefs.h" using namespace iPic3D; MPIdata* iPic3D::c_Solver::mpi=0; diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 4dfeb724..357f113a 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -19,6 +19,7 @@ developers: Stefano Markidis, Giovanni Lapenta #include "Grid3DCU.h" #include "Field.h" #include "MPIdata.h" +#include "ipicdefs.h" #include "TimeTasks.h" #include "Particles3D.h" diff --git a/performances/Timing.cpp b/performances/Timing.cpp index 6b3eec48..e639fd88 100644 --- a/performances/Timing.cpp +++ b/performances/Timing.cpp @@ -1,5 +1,6 @@ #include "Timing.h" +#include "ipicdefs.h" /** * From 7e4d4d1b6bc04aae6bb92c594443d6b37a22daeb Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 16 Jul 2013 11:45:40 +0200 Subject: [PATCH 010/118] issue #36: use int rather than long long in critical particle loops --- particles/Particles3D.cpp | 5 ++++- particles/Particles3Dcomm.cpp | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 357f113a..397a1c62 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -8,6 +8,8 @@ developers: Stefano Markidis, Giovanni Lapenta #include #include #include +#include +#include "asserts.h" #include "VirtualTopology3D.h" #include "VCtopology3D.h" @@ -329,12 +331,13 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { const double dto2 = .5 * dt, qomdt2 = qom * dto2 / c; const double inv_dx = 1.0 / dx, inv_dy = 1.0 / dy, inv_dz = 1.0 / dz; + assert_le(nop,INT_MAX); // else would need to use long long // don't bother trying to push any particles simultaneously; // MIC already does vectorization automatically, and trying // to do it by hand only hurts performance. #pragma omp parallel for #pragma simd // this just slows things down (why?) - for (long long rest = 0; rest < nop; rest++) { + for (int rest = 0; rest < nop; rest++) { // copy the particle double xp = x[rest]; double yp = y[rest]; diff --git a/particles/Particles3Dcomm.cpp b/particles/Particles3Dcomm.cpp index db0ec204..1a799dd7 100644 --- a/particles/Particles3Dcomm.cpp +++ b/particles/Particles3Dcomm.cpp @@ -7,6 +7,8 @@ developers: Stefano Markidis, Giovanni Lapenta. #include #include #include +#include +#include "asserts.h" #include "VirtualTopology3D.h" #include "VCtopology3D.h" #include "CollectiveIO.h" @@ -305,7 +307,8 @@ void Particles3Dcomm::interpP2G(Field * EMf, Grid * grid, VirtualTopology3D * vc //Moments speciesMoments(nxn,nyn,nzn,invVOL); //speciesMoments.set_to_zero(); //#pragma omp for - for (register long long i = 0; i < nop; i++) + assert_le(nop,INT_MAX); // else would need to use long long + for (int i = 0; i < nop; i++) { const int ix = 2 + int (floor((x[i] - xstart) * inv_dx)); const int iy = 2 + int (floor((y[i] - ystart) * inv_dy)); From 23f510cba5e2b162285efe7ffb51293135b8b9bd Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 16 Jul 2013 19:51:43 +0200 Subject: [PATCH 011/118] fixed typo in TimeTask output introduced in commit 6c903629 --- utility/TimeTasks.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utility/TimeTasks.cpp b/utility/TimeTasks.cpp index 69f56fb7..b48bb070 100644 --- a/utility/TimeTasks.cpp +++ b/utility/TimeTasks.cpp @@ -59,7 +59,7 @@ void TimeTasks::print_cycle_times() if(!MPIdata::get_rank()) { fflush(stdout); - fprintf(stdout,"=== times for cycle for rank %d) === \n", + fprintf(stdout,"=== times for cycle for rank %d === \n", MPIdata::get_rank()); fprintf(stdout, TIMING_PREFIX "moms flds pcls Bfld cycl\n"); From 3687a02412be2a660aab226a059a86f6457cfb2e Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 16 Jul 2013 19:39:25 +0200 Subject: [PATCH 012/118] Local omp.h was conflicting with system omp.h --- include/ompdefs.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 include/ompdefs.h diff --git a/include/ompdefs.h b/include/ompdefs.h new file mode 100644 index 00000000..5c5fab00 --- /dev/null +++ b/include/ompdefs.h @@ -0,0 +1,25 @@ +#ifndef ompdefs_H +#define ompdefs_H + +#include +#include "asserts.h" +// the compiler sets _OPENMP if the -openmp flag is used +#ifdef _OPENMP +#include +#else +inline int omp_get_thread_num() { + return 0; +} +#endif + +inline int omp_thread_count() { + int n = 0; + #pragma omp parallel reduction(+:n) + n += 1; + #ifndef _OPENMP // USING_OMP + assert_eq(n,1); + #endif + return n; +} + +#endif From bb6ec5f19aa1ce565e8939500b268d0eb029452d Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 16 Jul 2013 19:36:55 +0200 Subject: [PATCH 013/118] issue #33: making grid accessors inline again --- grids/Grid3DCU.cpp | 114 --------------------------------------------- include/Grid3DCU.h | 71 ++++++++++------------------ 2 files changed, 24 insertions(+), 161 deletions(-) diff --git a/grids/Grid3DCU.cpp b/grids/Grid3DCU.cpp index c2b92797..973b20f8 100644 --- a/grids/Grid3DCU.cpp +++ b/grids/Grid3DCU.cpp @@ -413,117 +413,3 @@ void Grid3DCU::interpN2C(double ****vecFieldC, int ns, double ****vecFieldN) { } -/** get nxc */ -int Grid3DCU::getNXC() { - return (nxc); -} - -/** get nxn */ -int Grid3DCU::getNXN() { - return (nxn); -} - -/** get nyc */ -int Grid3DCU::getNYC() { - return (nyc); -} - -/** get nyn */ -int Grid3DCU::getNYN() { - return (nyn); -} - -/** get nzc */ -int Grid3DCU::getNZC() { - return (nzc); -} - -/** get nzn */ -int Grid3DCU::getNZN() { - return (nzn); -} - -/** get dx */ -double Grid3DCU::getDX() { - return (dx); -} - -/** get dy */ -double Grid3DCU::getDY() { - return (dy); -} - -/** get dz */ -double Grid3DCU::getDZ() { - return (dz); -} - -/** get xn[][][] */ -double &Grid3DCU::getXN(int indexX, int indexY, int indexZ) { - return (node_coordinate[indexX][indexY][indexZ][0]); -} - -/** get yn[][][] */ -double &Grid3DCU::getYN(int indexX, int indexY, int indexZ) { - return (node_coordinate[indexX][indexY][indexZ][1]); -} - -/** get zn[][][] */ -double &Grid3DCU::getZN(int indexX, int indexY, int indexZ) { - return (node_coordinate[indexX][indexY][indexZ][2]); -} - -/** get xc[][][] */ -double &Grid3DCU::getXC(int indexX, int indexY, int indexZ) { - return (center_coordinate[indexX][indexY][indexZ][0]); -} - -/** get yc[][][] */ -double &Grid3DCU::getYC(int indexX, int indexY, int indexZ) { - return (center_coordinate[indexX][indexY][indexZ][1]); -} - -/** get zc[][][] */ -double &Grid3DCU::getZC(int indexX, int indexY, int indexZ) { - return (center_coordinate[indexX][indexY][indexZ][2]); -} - -/** get the whole vector of nodes*/ -double ****Grid3DCU::getN() { - return node_coordinate; -} - -/** get Xstart */ -double Grid3DCU::getXstart() { - return (xStart); -} - -/** get Xend */ -double Grid3DCU::getXend() { - return (xEnd); -} - -/** get Ystart */ -double Grid3DCU::getYstart() { - return (yStart); -} - -/** get Yend */ -double Grid3DCU::getYend() { - return (yEnd); -} - -/** get Zstart */ -double Grid3DCU::getZstart() { - return (zStart); -} - -/** get Zend */ -double Grid3DCU::getZend() { - return (zEnd); -} - -/** get the inverse of volume */ -double Grid3DCU::getInvVOL() { - return (invVOL); -} diff --git a/include/Grid3DCU.h b/include/Grid3DCU.h index 819c041e..f085ea34 100644 --- a/include/Grid3DCU.h +++ b/include/Grid3DCU.h @@ -81,53 +81,6 @@ class Grid3DCU // :public Grid /** interpolate on central points from nodes */ void interpN2C(double ****vecFieldC, int ns, double ****vecFieldN); - /** return nxc */ - int getNXC(); - /** return nxn */ - int getNXN(); - /** return nyc */ - int getNYC(); - /** return nyn */ - int getNYN(); - /** return nzc */ - int getNZC(); - /** return nzn */ - int getNZN(); - /** return dx */ - double getDX(); - /** return dy */ - double getDY(); - /** return dz */ - double getDZ(); - /** get xn(X,Y,Z) */ - double &getXN(int indexX, int indexY, int indexZ); - /** get yn(X,Y,Z) */ - double &getYN(int indexX, int indexY, int indexZ); - /** get zn(X,Y,Z) */ - double &getZN(int indexX, int indexY, int indexZ); - /** get the whole vector of nodes*/ - double ****getN(); - /** get xc(X,Y,Z) */ - double &getXC(int indexX, int indexY, int indexZ); - /** get yc(X,Y,Z) */ - double &getYC(int indexX, int indexY, int indexZ); - /** get zc(X,Y,Z) */ - double &getZC(int indexX, int indexY, int indexZ); - /** get Xstart */ - double getXstart(); - /** get Xend */ - double getXend(); - /** get Ystart */ - double getYstart(); - /** get Yend */ - double getYend(); - /** get Zstart */ - double getZstart(); - /** get Zend */ - double getZend(); - /** get the inverse of volume */ - double getInvVOL(); - // /////////// PRIVATE VARIABLES ////////////// private: /** number of cells - X direction, including + 2 (guard cells) */ @@ -163,6 +116,30 @@ class Grid3DCU // :public Grid /** local grid boundaries coordinate */ double xStart, xEnd, yStart, yEnd, zStart, zEnd; +public: // accessors (inline) + int getNXC() { return (nxc); } + int getNXN() { return (nxn); } + int getNYC() { return (nyc); } + int getNYN() { return (nyn); } + int getNZC() { return (nzc); } + int getNZN() { return (nzn); } + double getDX() { return (dx); } + double getDY() { return (dy); } + double getDZ() { return (dz); } + double &getXN(int X, int Y, int Z) { return (node_coordinate[X][Y][Z][0]); } + double &getYN(int X, int Y, int Z) { return (node_coordinate[X][Y][Z][1]); } + double &getZN(int X, int Y, int Z) { return (node_coordinate[X][Y][Z][2]); } + double &getXC(int X, int Y, int Z) { return (center_coordinate[X][Y][Z][0]); } + double &getYC(int X, int Y, int Z) { return (center_coordinate[X][Y][Z][1]); } + double &getZC(int X, int Y, int Z) { return (center_coordinate[X][Y][Z][2]); } + double ****getN() { return node_coordinate; } + double getXstart() { return (xStart); } + double getXend() { return (xEnd); } + double getYstart() { return (yStart); } + double getYend() { return (yEnd); } + double getZstart() { return (zStart); } + double getZend() { return (zEnd); } + double getInvVOL() { return (invVOL); } }; typedef Grid3DCU Grid; From 2bcd7506d85a4374abe068152133ba409e1d813b Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 16 Jul 2013 16:03:24 +0200 Subject: [PATCH 014/118] issues #23 and issue #33: using one preallocated Moments instance per thread --- fields/EMfields3D.cpp | 180 +++------------------------- fields/Moments.cpp | 53 +++++++++ include/EMfields3D.h | 216 +++++++++++++--------------------- include/Moments.h | 182 ++++++++++++++++++++++++++++ include/ipicdefs.h | 3 + particles/Particles3Dcomm.cpp | 36 +++--- 6 files changed, 355 insertions(+), 315 deletions(-) create mode 100644 fields/Moments.cpp create mode 100644 include/Moments.h diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 1dc44a12..c6c5936d 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -2,6 +2,8 @@ #include #include "EMfields3D.h" #include "TimeTasks.h" +#include "Moments.h" +#include "ompdefs.h" /*! constructor */ EMfields3D::EMfields3D(Collective * col, Grid * grid) { @@ -162,6 +164,13 @@ EMfields3D::EMfields3D(Collective * col, Grid * grid) { vectY = newArr3(double, nxn, nyn, nzn); vectZ = newArr3(double, nxn, nyn, nzn); divC = newArr3(double, nxc, nyc, nzc); + sizeMomentsArray = omp_thread_count(); + momentsArray = new Moments*[sizeMomentsArray]; + for(int i=0;iinit(nxn,nyn,nzn,invVOL); + } } /*! Calculate Electric field with the implicit solver: the Maxwell solver method is called here */ @@ -1102,98 +1111,7 @@ void EMfields3D::communicateGhostP2G(int ns, int bcFaceXright, int bcFaceXleft, communicateNode_P(nxn, nyn, nzn, pZZsn, ns, vct); } - -/** add an amount of charge density to charge density field at node X,Y */ -void Moments::addRho(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) { - const double temp = weight[i][j][k] * invVOL; - rho[X - i][Y - j][Z - k] += temp; - } -} -/** add an amount of charge density to current density - direction X to current density field on the node*/ -void Moments::addJx(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) { - const double temp = weight[i][j][k] * invVOL; - Jx[X - i][Y - j][Z - k] += temp; - } -} -/** add an amount of current density - direction Y to current density field on the node */ -void Moments::addJy(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) { - const double temp = weight[i][j][k] * invVOL; - Jy[X - i][Y - j][Z - k] += temp; - } -} -/** add an amount of current density - direction Z to current density field on the node */ -void Moments::addJz(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) { - const double temp = weight[i][j][k] * invVOL; - Jz[X - i][Y - j][Z - k] += temp; - } -} -/** add an amount of pressure density - direction XX to current density field on the node */ -void Moments::addPxx(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) { - const double temp = weight[i][j][k] * invVOL; - pXX[X - i][Y - j][Z - k] += temp; - } -} -/** add an amount of pressure density - direction XY to current density field on the node*/ -void Moments::addPxy(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) { - const double temp = weight[i][j][k] * invVOL; - pXY[X - i][Y - j][Z - k] += temp; - } -} -/** add an amount of pressure density - direction XZ to current density field on the node */ -void Moments::addPxz(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) { - const double temp = weight[i][j][k] * invVOL; - pXZ[X - i][Y - j][Z - k] += temp; - } -} -/** add an amount of pressure density - direction YY to current density field on the node*/ -void Moments::addPyy(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) { - const double temp = weight[i][j][k] * invVOL; - pYY[X - i][Y - j][Z - k] += temp; - } -} -/** add an amount of pressure density - direction YZ to current density field on the node */ -void Moments::addPyz(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) { - const double temp = weight[i][j][k] * invVOL; - pYZ[X - i][Y - j][Z - k] += temp; - } -} -/** add an amount of pressure density - direction ZZ to current density field on the node */ -void Moments::addPzz(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) { - const double temp = weight[i][j][k] * invVOL; - pZZ[X - i][Y - j][Z - k] += temp; - } -} - +/* add moments (e.g. from an OpenMP thread) to the accumulated moments */ void EMfields3D::addToSpeciesMoments(const Moments & in, int is) { assert_eq(in.get_nx(), nxn); assert_eq(in.get_ny(), nyn); @@ -1215,79 +1133,6 @@ void EMfields3D::addToSpeciesMoments(const Moments & in, int is) { } } -/*! add an amount of charge density to charge density field at node X,Y */ -void EMfields3D::addRho(double weight[][2][2], int X, int Y, int Z, int is) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) - rhons[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; -} -/*! add an amount of charge density to current density - direction X to current density field on the node */ -void EMfields3D::addJx(double weight[][2][2], int X, int Y, int Z, int is) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) - Jxs[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; -} -/*! add an amount of current density - direction Y to current density field on the node */ -void EMfields3D::addJy(double weight[][2][2], int X, int Y, int Z, int is) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) - Jys[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; -} -/*! add an amount of current density - direction Z to current density field on the node */ -void EMfields3D::addJz(double weight[][2][2], int X, int Y, int Z, int is) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) - Jzs[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; -} -/*! add an amount of pressure density - direction XX to current density field on the node */ -void EMfields3D::addPxx(double weight[][2][2], int X, int Y, int Z, int is) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) - pXXsn[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; -} -/*! add an amount of pressure density - direction XY to current density field on the node */ -void EMfields3D::addPxy(double weight[][2][2], int X, int Y, int Z, int is) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) - pXYsn[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; -} -/*! add an amount of pressure density - direction XZ to current density field on the node */ -void EMfields3D::addPxz(double weight[][2][2], int X, int Y, int Z, int is) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) - pXZsn[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; -} -/*! add an amount of pressure density - direction YY to current density field on the node */ -void EMfields3D::addPyy(double weight[][2][2], int X, int Y, int Z, int is) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) - pYYsn[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; -} -/*! add an amount of pressure density - direction YZ to current density field on the node */ -void EMfields3D::addPyz(double weight[][2][2], int X, int Y, int Z, int is) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) - pYZsn[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; -} -/*! add an amount of pressure density - direction ZZ to current density field on the node */ -void EMfields3D::addPzz(double weight[][2][2], int X, int Y, int Z, int is) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) - pZZsn[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; -} - - - /*! set to 0 all the densities fields */ void EMfields3D::setZeroDensities() { for (register int i = 0; i < nxn; i++) @@ -3362,4 +3207,9 @@ EMfields3D::~EMfields3D() { delArr3(vectY, nxn, nyn); delArr3(vectZ, nxn, nyn); delArr3(divC, nxc, nyc); + for(int i=0;i #include @@ -31,140 +30,7 @@ using std::endl; /*! Electromagnetic fields and sources defined for each local grid, and for an implicit maxwell's solver @date May 2008 @par Copyright: (C) 2008 KUL @author Stefano Markidis, Giovanni Lapenta. @version 3.0 */ -// class to accumulate node-centered species moments -// -class Moments { - private: - double invVOL; - double ***rho; - - /** current density, defined on nodes */ - double ***Jx; - double ***Jy; - double ***Jz; - - /** pressure tensor components, defined on nodes */ - double ***pXX; - double ***pXY; - double ***pXZ; - double ***pYY; - double ***pYZ; - double ***pZZ; - int nx; - int ny; - int nz; - public: - int get_nx() const { - return nx; - } - int get_ny() const { - return ny; - } - int get_nz() const { - return nz; - } - double get_invVOL() const { - return invVOL; - } - double get_rho(int i, int j, int k) const { - return rho[i][j][k]; - } - double get_Jx(int i, int j, int k) const { - return Jx[i][j][k]; - } - double get_Jy(int i, int j, int k) const { - return Jy[i][j][k]; - } - double get_Jz(int i, int j, int k) const { - return Jz[i][j][k]; - } - double get_pXX(int i, int j, int k) const { - return pXX[i][j][k]; - } - double get_pXY(int i, int j, int k) const { - return pXY[i][j][k]; - } - double get_pXZ(int i, int j, int k) const { - return pXZ[i][j][k]; - } - double get_pYY(int i, int j, int k) const { - return pYY[i][j][k]; - } - double get_pYZ(int i, int j, int k) const { - return pYZ[i][j][k]; - } - double get_pZZ(int i, int j, int k) const { - return pZZ[i][j][k]; - } - public: - Moments() { - }; - Moments(int nx_, int ny_, int nz_, double invVOL_); - ~Moments(); - void set_to_zero(); - void addRho(double weight[][2][2], int X, int Y, int Z); - void addJx(double weight[][2][2], int X, int Y, int Z); - void addJy(double weight[][2][2], int X, int Y, int Z); - void addJz(double weight[][2][2], int X, int Y, int Z); - - void addPxx(double weight[][2][2], int X, int Y, int Z); - void addPxy(double weight[][2][2], int X, int Y, int Z); - void addPxz(double weight[][2][2], int X, int Y, int Z); - void addPyy(double weight[][2][2], int X, int Y, int Z); - void addPyz(double weight[][2][2], int X, int Y, int Z); - void addPzz(double weight[][2][2], int X, int Y, int Z); -}; - -// construct empty instance (not zeroed) -inline Moments::Moments(int nx_, int ny_, int nz_, double invVOL_) { - nx = nx_; - ny = ny_; - nz = nz_; - invVOL = invVOL_; - rho = newArr3(double, nx, ny, nz); - Jx = newArr3(double, nx, ny, nz); - Jy = newArr3(double, nx, ny, nz); - Jz = newArr3(double, nx, ny, nz); - pXX = newArr3(double, nx, ny, nz); - pXY = newArr3(double, nx, ny, nz); - pXZ = newArr3(double, nx, ny, nz); - pYY = newArr3(double, nx, ny, nz); - pYZ = newArr3(double, nx, ny, nz); - pZZ = newArr3(double, nx, ny, nz); -} - -inline Moments::~Moments() { - // nodes and species - delArr3(rho, nx, ny); - delArr3(Jx, nx, ny); - delArr3(Jy, nx, ny); - delArr3(Jz, nx, ny); - delArr3(pXX, nx, ny); - delArr3(pXY, nx, ny); - delArr3(pXZ, nx, ny); - delArr3(pYY, nx, ny); - delArr3(pYZ, nx, ny); - delArr3(pZZ, nx, ny); -} - -inline void Moments::set_to_zero() { - // #pragma omp parallel for collapse(1) - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) { - rho[i][j][k] = 0.0; - Jx[i][j][k] = 0.0; - Jy[i][j][k] = 0.0; - Jz[i][j][k] = 0.0; - pXX[i][j][k] = 0.0; - pXY[i][j][k] = 0.0; - pXZ[i][j][k] = 0.0; - pYY[i][j][k] = 0.0; - pYZ[i][j][k] = 0.0; - pZZ[i][j][k] = 0.0; - } -} - +class Moments; class EMfields3D // :public Field { public: @@ -410,6 +276,12 @@ class EMfields3D // :public Field /*! get the magnetic field energy */ double getBenergy(); + /*! fetch array for summing moments of thread i */ + Moments& fetch_momentsArray(int i){ + assert_le(0,i); + assert_le(i,sizeMomentsArray); + return *momentsArray[i]; + } /*! print electromagnetic fields info */ void print(void) const; @@ -531,6 +403,9 @@ class EMfields3D // :public Field double ***vectY; double ***vectZ; double ***divC; + /* temporary arrays for summing moments */ + int sizeMomentsArray; + Moments **momentsArray; // ******************************************************************************* @@ -658,6 +533,77 @@ class EMfields3D // :public Field }; +inline void EMfields3D::addRho(double weight[][2][2], int X, int Y, int Z, int is) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) + rhons[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; +} +/*! add an amount of charge density to current density - direction X to current density field on the node */ +inline void EMfields3D::addJx(double weight[][2][2], int X, int Y, int Z, int is) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) + Jxs[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; +} +/*! add an amount of current density - direction Y to current density field on the node */ +inline void EMfields3D::addJy(double weight[][2][2], int X, int Y, int Z, int is) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) + Jys[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; +} +/*! add an amount of current density - direction Z to current density field on the node */ +inline void EMfields3D::addJz(double weight[][2][2], int X, int Y, int Z, int is) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) + Jzs[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; +} +/*! add an amount of pressure density - direction XX to current density field on the node */ +inline void EMfields3D::addPxx(double weight[][2][2], int X, int Y, int Z, int is) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) + pXXsn[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; +} +/*! add an amount of pressure density - direction XY to current density field on the node */ +inline void EMfields3D::addPxy(double weight[][2][2], int X, int Y, int Z, int is) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) + pXYsn[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; +} +/*! add an amount of pressure density - direction XZ to current density field on the node */ +inline void EMfields3D::addPxz(double weight[][2][2], int X, int Y, int Z, int is) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) + pXZsn[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; +} +/*! add an amount of pressure density - direction YY to current density field on the node */ +inline void EMfields3D::addPyy(double weight[][2][2], int X, int Y, int Z, int is) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) + pYYsn[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; +} +/*! add an amount of pressure density - direction YZ to current density field on the node */ +inline void EMfields3D::addPyz(double weight[][2][2], int X, int Y, int Z, int is) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) + pYZsn[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; +} +/*! add an amount of pressure density - direction ZZ to current density field on the node */ +inline void EMfields3D::addPzz(double weight[][2][2], int X, int Y, int Z, int is) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) + pZZsn[is][X - i][Y - j][Z - k] += weight[i][j][k] * invVOL; +} + + typedef EMfields3D Field; #endif diff --git a/include/Moments.h b/include/Moments.h new file mode 100644 index 00000000..d4349ecc --- /dev/null +++ b/include/Moments.h @@ -0,0 +1,182 @@ +#ifndef Moments_H +#define Moments_H + +// class to accumulate node-centered species moments +// +class Moments { + private: + double invVOL; + double ***rho; + + /** current density, defined on nodes */ + double ***Jx; + double ***Jy; + double ***Jz; + + /** pressure tensor components, defined on nodes */ + double ***pXX; + double ***pXY; + double ***pXZ; + double ***pYY; + double ***pYZ; + double ***pZZ; + int nx; + int ny; + int nz; + public: + int get_nx() const { + return nx; + } + int get_ny() const { + return ny; + } + int get_nz() const { + return nz; + } + double get_invVOL() const { + return invVOL; + } + double get_rho(int i, int j, int k) const { + return rho[i][j][k]; + } + double get_Jx(int i, int j, int k) const { + return Jx[i][j][k]; + } + double get_Jy(int i, int j, int k) const { + return Jy[i][j][k]; + } + double get_Jz(int i, int j, int k) const { + return Jz[i][j][k]; + } + double get_pXX(int i, int j, int k) const { + return pXX[i][j][k]; + } + double get_pXY(int i, int j, int k) const { + return pXY[i][j][k]; + } + double get_pXZ(int i, int j, int k) const { + return pXZ[i][j][k]; + } + double get_pYY(int i, int j, int k) const { + return pYY[i][j][k]; + } + double get_pYZ(int i, int j, int k) const { + return pYZ[i][j][k]; + } + double get_pZZ(int i, int j, int k) const { + return pZZ[i][j][k]; + } + public: + Moments() { + }; + Moments(int nx_, int ny_, int nz_, double invVOL_){ + init(nx_,ny_,nz_,invVOL_); + } + void init(int nx_, int ny_, int nz_, double invVOL_); + ~Moments(); + void set_to_zero(); + void addRho(double weight[][2][2], int X, int Y, int Z); + void addJx(double weight[][2][2], int X, int Y, int Z); + void addJy(double weight[][2][2], int X, int Y, int Z); + void addJz(double weight[][2][2], int X, int Y, int Z); + + void addPxx(double weight[][2][2], int X, int Y, int Z); + void addPxy(double weight[][2][2], int X, int Y, int Z); + void addPxz(double weight[][2][2], int X, int Y, int Z); + void addPyy(double weight[][2][2], int X, int Y, int Z); + void addPyz(double weight[][2][2], int X, int Y, int Z); + void addPzz(double weight[][2][2], int X, int Y, int Z); +}; + +/** add an amount of charge density to charge density field at node X,Y */ +inline void Moments::addRho(double weight[][2][2], int X, int Y, int Z) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) { + const double temp = weight[i][j][k] * invVOL; + rho[X - i][Y - j][Z - k] += temp; + } +} +/** add an amount of charge density to current density - direction X to current density field on the node*/ +inline void Moments::addJx(double weight[][2][2], int X, int Y, int Z) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++){ + const double temp = weight[i][j][k] * invVOL; + Jx[X - i][Y - j][Z - k] += temp; + } +} +/** add an amount of current density - direction Y to current density field on the node */ +inline void Moments::addJy(double weight[][2][2], int X, int Y, int Z) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++){ + const double temp = weight[i][j][k] * invVOL; + Jy[X - i][Y - j][Z - k] += temp; + } +} +/** add an amount of current density - direction Z to current density field on the node */ +inline void Moments::addJz(double weight[][2][2], int X, int Y, int Z) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++){ + const double temp = weight[i][j][k] * invVOL; + Jz[X - i][Y - j][Z - k] += temp; + } +} +/** add an amount of pressure density - direction XX to current density field on the node */ +inline void Moments::addPxx(double weight[][2][2], int X, int Y, int Z) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++){ + const double temp = weight[i][j][k] * invVOL; + pXX[X - i][Y - j][Z - k] += temp; + } +} +/** add an amount of pressure density - direction XY to current density field on the node*/ +inline void Moments::addPxy(double weight[][2][2], int X, int Y, int Z) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++){ + const double temp = weight[i][j][k] * invVOL; + pXY[X - i][Y - j][Z - k] += temp; + } +} +/** add an amount of pressure density - direction XZ to current density field on the node */ +inline void Moments::addPxz(double weight[][2][2], int X, int Y, int Z) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++){ + const double temp = weight[i][j][k] * invVOL; + pXZ[X - i][Y - j][Z - k] += temp; + } +} +/** add an amount of pressure density - direction YY to current density field on the node*/ +inline void Moments::addPyy(double weight[][2][2], int X, int Y, int Z) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++){ + const double temp = weight[i][j][k] * invVOL; + pYY[X - i][Y - j][Z - k] += temp; + } +} +/** add an amount of pressure density - direction YZ to current density field on the node */ +inline void Moments::addPyz(double weight[][2][2], int X, int Y, int Z) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++){ + const double temp = weight[i][j][k] * invVOL; + pYZ[X - i][Y - j][Z - k] += temp; + } +} +/** add an amount of pressure density - direction ZZ to current density field on the node */ +inline void Moments::addPzz(double weight[][2][2], int X, int Y, int Z) { + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++){ + const double temp = weight[i][j][k] * invVOL; + pZZ[X - i][Y - j][Z - k] += temp; + } +} + +#endif diff --git a/include/ipicdefs.h b/include/ipicdefs.h index dfa44969..ef86aaf1 100644 --- a/include/ipicdefs.h +++ b/include/ipicdefs.h @@ -1,6 +1,9 @@ #ifndef __IPIC_DEFS_H__ #define __IPIC_DEFS_H__ +// comment this out if OpenMP is not installed on your system. +#define USING_OMP + // uncomment the following line to use parallel hdf5 //#define USING_PARALLEL_HDF5 diff --git a/particles/Particles3Dcomm.cpp b/particles/Particles3Dcomm.cpp index 1a799dd7..5d1be951 100644 --- a/particles/Particles3Dcomm.cpp +++ b/particles/Particles3Dcomm.cpp @@ -20,13 +20,16 @@ developers: Stefano Markidis, Giovanni Lapenta. #include "Grid.h" #include "Grid3DCU.h" #include "Field.h" +#include "Moments.h" #include "MPIdata.h" +#include "ompdefs.h" #include "Particles3Dcomm.h" #include "hdf5.h" #include #include +#include "debug.h" using std::cout; using std::cerr; @@ -302,11 +305,14 @@ void Particles3Dcomm::interpP2G(Field * EMf, Grid * grid, VirtualTopology3D * vc const double nxn = grid->getNXN(); const double nyn = grid->getNYN(); const double nzn = grid->getNZN(); - //#pragma omp parallel + #pragma omp parallel { + dprint(omp_get_thread_num()); + Moments& speciesMoments = EMf->fetch_momentsArray(omp_get_thread_num()); //Moments speciesMoments(nxn,nyn,nzn,invVOL); - //speciesMoments.set_to_zero(); - //#pragma omp for + //Field& speciesMoments = *EMf; + speciesMoments.set_to_zero(); + #pragma omp for assert_le(nop,INT_MAX); // else would need to use long long for (int i = 0; i < nop; i++) { @@ -336,65 +342,65 @@ void Particles3Dcomm::interpP2G(Field * EMf, Grid * grid, VirtualTopology3D * vc //weight[1][1][0] = q[i] * xi[1] * eta[1] * zeta[0] * invVOL; //weight[1][1][1] = q[i] * xi[1] * eta[1] * zeta[1] * invVOL; // add charge density - EMf->addRho(weight, ix, iy, iz, ns); + speciesMoments.addRho(weight, ix, iy, iz); // add current density - X for (int ii = 0; ii < 2; ii++) for (int jj = 0; jj < 2; jj++) for (int kk = 0; kk < 2; kk++) temp[ii][jj][kk] = u[i] * weight[ii][jj][kk]; - EMf->addJx(temp, ix, iy, iz, ns); + speciesMoments.addJx(temp, ix, iy, iz); // add current density - Y for (int ii = 0; ii < 2; ii++) for (int jj = 0; jj < 2; jj++) for (int kk = 0; kk < 2; kk++) temp[ii][jj][kk] = v[i] * weight[ii][jj][kk]; - EMf->addJy(temp, ix, iy, iz, ns); + speciesMoments.addJy(temp, ix, iy, iz); // add current density - Z for (int ii = 0; ii < 2; ii++) for (int jj = 0; jj < 2; jj++) for (int kk = 0; kk < 2; kk++) temp[ii][jj][kk] = w[i] * weight[ii][jj][kk]; - EMf->addJz(temp, ix, iy, iz, ns); + speciesMoments.addJz(temp, ix, iy, iz); // Pxx - add pressure tensor for (int ii = 0; ii < 2; ii++) for (int jj = 0; jj < 2; jj++) for (int kk = 0; kk < 2; kk++) temp[ii][jj][kk] = u[i] * u[i] * weight[ii][jj][kk]; - EMf->addPxx(temp, ix, iy, iz, ns); + speciesMoments.addPxx(temp, ix, iy, iz); // Pxy - add pressure tensor for (int ii = 0; ii < 2; ii++) for (int jj = 0; jj < 2; jj++) for (int kk = 0; kk < 2; kk++) temp[ii][jj][kk] = u[i] * v[i] * weight[ii][jj][kk]; - EMf->addPxy(temp, ix, iy, iz, ns); + speciesMoments.addPxy(temp, ix, iy, iz); // Pxz - add pressure tensor for (int ii = 0; ii < 2; ii++) for (int jj = 0; jj < 2; jj++) for (int kk = 0; kk < 2; kk++) temp[ii][jj][kk] = u[i] * w[i] * weight[ii][jj][kk]; - EMf->addPxz(temp, ix, iy, iz, ns); + speciesMoments.addPxz(temp, ix, iy, iz); // Pyy - add pressure tensor for (int ii = 0; ii < 2; ii++) for (int jj = 0; jj < 2; jj++) for (int kk = 0; kk < 2; kk++) temp[ii][jj][kk] = v[i] * v[i] * weight[ii][jj][kk]; - EMf->addPyy(temp, ix, iy, iz, ns); + speciesMoments.addPyy(temp, ix, iy, iz); // Pyz - add pressure tensor for (int ii = 0; ii < 2; ii++) for (int jj = 0; jj < 2; jj++) for (int kk = 0; kk < 2; kk++) temp[ii][jj][kk] = v[i] * w[i] * weight[ii][jj][kk]; - EMf->addPyz(temp, ix, iy, iz, ns); + speciesMoments.addPyz(temp, ix, iy, iz); // Pzz - add pressure tensor for (int ii = 0; ii < 2; ii++) for (int jj = 0; jj < 2; jj++) for (int kk = 0; kk < 2; kk++) temp[ii][jj][kk] = w[i] * w[i] * weight[ii][jj][kk]; - EMf->addPzz(temp, ix, iy, iz, ns); + speciesMoments.addPzz(temp, ix, iy, iz); } // change this to allow more parallelization after implementing array class - //#pragma omp critical - //EMf->addToSpeciesMoments(speciesMoments,ns); + #pragma omp critical + EMf->addToSpeciesMoments(speciesMoments,ns); } // communicate contribution from ghost cells EMf->communicateGhostP2G(ns, 0, 0, 0, 0, vct); From 4cbe8208a61a3797ac57266c670d40c535803a44 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 16 Jul 2013 20:07:04 +0200 Subject: [PATCH 015/118] issue #23 and #33: accumulating Moments using multiple instances for OpenMP Using a grid-sized instance of Moments for every OpenMP thread will consume too much memory if a very large number of OpenMP threads are used and so is something of a stop-gap. But the current serial code would be unacceptably slow in that case, so this shouldn't really break anything. EMfields should be redesigned to contain a set of Moments instances to be passed from the particle solver to the field solver. --- particles/Particles3Dcomm.cpp | 93 +++++++++++++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 4 deletions(-) diff --git a/particles/Particles3Dcomm.cpp b/particles/Particles3Dcomm.cpp index 5d1be951..d339894d 100644 --- a/particles/Particles3Dcomm.cpp +++ b/particles/Particles3Dcomm.cpp @@ -305,15 +305,19 @@ void Particles3Dcomm::interpP2G(Field * EMf, Grid * grid, VirtualTopology3D * vc const double nxn = grid->getNXN(); const double nyn = grid->getNYN(); const double nzn = grid->getNZN(); + assert_le(nop,INT_MAX); // else would need to use long long + // to make memory use scale to a large number of threads we + // could first apply an efficient parallel sorting algorithm + // to the particles and then accumulate moments in smaller + // subarrays. + //#ifdef _OPENMP #pragma omp parallel { - dprint(omp_get_thread_num()); - Moments& speciesMoments = EMf->fetch_momentsArray(omp_get_thread_num()); + int thread_num = omp_get_thread_num(); + Moments& speciesMoments = EMf->fetch_momentsArray(thread_num); //Moments speciesMoments(nxn,nyn,nzn,invVOL); - //Field& speciesMoments = *EMf; speciesMoments.set_to_zero(); #pragma omp for - assert_le(nop,INT_MAX); // else would need to use long long for (int i = 0; i < nop; i++) { const int ix = 2 + int (floor((x[i] - xstart) * inv_dx)); @@ -402,6 +406,87 @@ void Particles3Dcomm::interpP2G(Field * EMf, Grid * grid, VirtualTopology3D * vc #pragma omp critical EMf->addToSpeciesMoments(speciesMoments,ns); } + //#else + //{ + // assert_le(nop,INT_MAX); // else would need to use long long + // for (int i = 0; i < nop; i++) + // { + // const int ix = 2 + int (floor((x[i] - xstart) * inv_dx)); + // const int iy = 2 + int (floor((y[i] - ystart) * inv_dy)); + // const int iz = 2 + int (floor((z[i] - zstart) * inv_dz)); + // double temp[2][2][2]; + // double xi[2], eta[2], zeta[2]; + // xi[0] = x[i] - grid->getXN(ix - 1, iy, iz); + // eta[0] = y[i] - grid->getYN(ix, iy - 1, iz); + // zeta[0] = z[i] - grid->getZN(ix, iy, iz - 1); + // xi[1] = grid->getXN(ix, iy, iz) - x[i]; + // eta[1] = grid->getYN(ix, iy, iz) - y[i]; + // zeta[1] = grid->getZN(ix, iy, iz) - z[i]; + // double weight[2][2][2]; + // for (int ii = 0; ii < 2; ii++) + // for (int jj = 0; jj < 2; jj++) + // for (int kk = 0; kk < 2; kk++) { + // weight[ii][jj][kk] = q[i] * xi[ii] * eta[jj] * zeta[kk] * invVOL; + // } + // // add charge density + // EMf->addRho(weight, ix, iy, iz, ns); + // // add current density - X + // for (int ii = 0; ii < 2; ii++) + // for (int jj = 0; jj < 2; jj++) + // for (int kk = 0; kk < 2; kk++) + // temp[ii][jj][kk] = u[i] * weight[ii][jj][kk]; + // EMf->addJx(temp, ix, iy, iz, ns); + // // add current density - Y + // for (int ii = 0; ii < 2; ii++) + // for (int jj = 0; jj < 2; jj++) + // for (int kk = 0; kk < 2; kk++) + // temp[ii][jj][kk] = v[i] * weight[ii][jj][kk]; + // EMf->addJy(temp, ix, iy, iz, ns); + // // add current density - Z + // for (int ii = 0; ii < 2; ii++) + // for (int jj = 0; jj < 2; jj++) + // for (int kk = 0; kk < 2; kk++) + // temp[ii][jj][kk] = w[i] * weight[ii][jj][kk]; + // EMf->addJz(temp, ix, iy, iz, ns); + // // Pxx - add pressure tensor + // for (int ii = 0; ii < 2; ii++) + // for (int jj = 0; jj < 2; jj++) + // for (int kk = 0; kk < 2; kk++) + // temp[ii][jj][kk] = u[i] * u[i] * weight[ii][jj][kk]; + // EMf->addPxx(temp, ix, iy, iz, ns); + // // Pxy - add pressure tensor + // for (int ii = 0; ii < 2; ii++) + // for (int jj = 0; jj < 2; jj++) + // for (int kk = 0; kk < 2; kk++) + // temp[ii][jj][kk] = u[i] * v[i] * weight[ii][jj][kk]; + // EMf->addPxy(temp, ix, iy, iz, ns); + // // Pxz - add pressure tensor + // for (int ii = 0; ii < 2; ii++) + // for (int jj = 0; jj < 2; jj++) + // for (int kk = 0; kk < 2; kk++) + // temp[ii][jj][kk] = u[i] * w[i] * weight[ii][jj][kk]; + // EMf->addPxz(temp, ix, iy, iz, ns); + // // Pyy - add pressure tensor + // for (int ii = 0; ii < 2; ii++) + // for (int jj = 0; jj < 2; jj++) + // for (int kk = 0; kk < 2; kk++) + // temp[ii][jj][kk] = v[i] * v[i] * weight[ii][jj][kk]; + // EMf->addPyy(temp, ix, iy, iz, ns); + // // Pyz - add pressure tensor + // for (int ii = 0; ii < 2; ii++) + // for (int jj = 0; jj < 2; jj++) + // for (int kk = 0; kk < 2; kk++) + // temp[ii][jj][kk] = v[i] * w[i] * weight[ii][jj][kk]; + // EMf->addPyz(temp, ix, iy, iz, ns); + // // Pzz - add pressure tensor + // for (int ii = 0; ii < 2; ii++) + // for (int jj = 0; jj < 2; jj++) + // for (int kk = 0; kk < 2; kk++) + // temp[ii][jj][kk] = w[i] * w[i] * weight[ii][jj][kk]; + // EMf->addPzz(temp, ix, iy, iz, ns); + // } + //} + //#endif // communicate contribution from ghost cells EMf->communicateGhostP2G(ns, 0, 0, 0, 0, vct); } From 499c8bfddb71c3e8e18e2971281907edd1727bd7 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 16 Jul 2013 19:48:13 +0200 Subject: [PATCH 016/118] issue #33: eliminating iostream header from asserts.h --- include/asserts.h | 1 - utility/asserts.cpp | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asserts.h b/include/asserts.h index 962095e7..a3cd4584 100644 --- a/include/asserts.h +++ b/include/asserts.h @@ -1,7 +1,6 @@ #ifndef __ASSERTS_H__ #define __ASSERTS_H__ -#include #include #include diff --git a/utility/asserts.cpp b/utility/asserts.cpp index c5beed50..576944f4 100644 --- a/utility/asserts.cpp +++ b/utility/asserts.cpp @@ -1,4 +1,5 @@ +#include #include "asserts.h" void assert_error(const char *file, int line, const char *func, const char *op, const char *lhs_str, const char *rhs_str, double lhs, double rhs) { From cdc05b344bbc02865ddee8e7a42aff6e6cbba709 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 22 Jul 2013 11:33:02 +0200 Subject: [PATCH 017/118] issue #39: removed asgArr3 and asgArr4 --- include/Alloc.h | 73 --------------------------------------- particles/Particles3D.cpp | 24 ++++++------- 2 files changed, 12 insertions(+), 85 deletions(-) diff --git a/include/Alloc.h b/include/Alloc.h index 3a0c7173..a57b9b70 100644 --- a/include/Alloc.h +++ b/include/Alloc.h @@ -32,34 +32,6 @@ template < class type > type **** _new_4_array(int sz1, int sz2, int sz3, int sz return result; } -/*! The assigment for 4D array */ -template < class type > type **** _assign_4_array(int sz1, int sz2, int sz3, int sz4, type **** org) { - - type ****all_x; - type ***all_y; - type **all_z; - type *all_r; - - all_x = org; - all_y = org[0]; - all_z = org[0][0]; - all_r = org[0][0][0]; - - type ****result = all_x; - - for (int i = 0; i < sz1; i++, all_y += sz2) { - result[i] = all_y; - for (int j = 0; j < sz2; j++, all_z += sz3) { - result[i][j] = all_z; - for (int k = 0; k < sz3; k++, all_r += sz4) { - result[i][j][k] = all_r; - } - } - } - - return result; -} - /*! Deallocator for 4D arrays */ template < class type > void delArr4(type **** arr, int dummyx, int dummyy, int dummyz) { delete[]arr[0][0][0]; @@ -92,29 +64,6 @@ template < class type > type *** _new_3_array(int sz1, int sz2, int sz3) { } -/*! The assignment for 3D array */ -template < class type > type *** _assign_3_array(int sz1, int sz2, int sz3, type *** org) { - - type ***all_x; - type **all_y; - type *all_z; - - all_x = org; - all_y = org[0]; - all_z = org[0][0]; - - type ***result = org; - - for (int i = 0; i < sz1; i++, all_y += sz2) { - result[i] = all_y; - for (int j = 0; j < sz2; j++, all_z += sz3) { - result[i][j] = all_z; - } - } - - return result; -} - /*! Deallocator for 3D arrays */ template < class type > void delArr3(type *** arr, int dummyx, int dummyy) { delete[]arr[0][0]; @@ -141,24 +90,6 @@ template < class type > type ** _new_2_array(int sz1, int sz2) { } -/*! The assignment for 2D array */ -template < class type > type ** _assign_2_array(int sz1, int sz2, type ** org) { - - type **all_x; - type *all_y; - - all_x = org; - all_y = org[0]; - - type **result = org; - - for (int i = 0; i < sz1; i++, all_y += sz2) { - result[i] = all_y; - } - - return result; -} - /*! Deallocator for 2D arrays */ template < class type > void delArr2(type ** arr, int dummyx) { delete[]arr[0]; @@ -169,8 +100,4 @@ template < class type > void delArr2(type ** arr, int dummyx) { #define newArr3(type,sz1,sz2,sz3) _new_3_array((sz1),(sz2),(sz3)) #define newArr2(type,sz1,sz2) _new_2_array((sz1),(sz2)) -#define asgArr2(type,sz1,sz2,org) _assign_2_array((sz1),(sz2),(org)) -#define asgArr3(type,sz1,sz2,sz3,org) _assign_3_array((sz1),(sz2),(sz3),(org)) -#define asgArr4(type,sz1,sz2,sz3,sz4,org) _assign_4_array((sz1),(sz2),(sz3),(sz4),(org)) - #endif diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 397a1c62..9b475115 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -316,18 +316,18 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { cout << "*** MOVER species " << ns << " ***" << NiterMover << " ITERATIONS ****" << endl; } double start_mover_PC = MPI_Wtime(); - double ***Ex = asgArr3(double, grid->getNXN(), grid->getNYN(), grid->getNZN(), EMf->getEx()); - double ***Ey = asgArr3(double, grid->getNXN(), grid->getNYN(), grid->getNZN(), EMf->getEy()); - double ***Ez = asgArr3(double, grid->getNXN(), grid->getNYN(), grid->getNZN(), EMf->getEz()); - double ***Bx = asgArr3(double, grid->getNXN(), grid->getNYN(), grid->getNZN(), EMf->getBx()); - double ***By = asgArr3(double, grid->getNXN(), grid->getNYN(), grid->getNZN(), EMf->getBy()); - double ***Bz = asgArr3(double, grid->getNXN(), grid->getNYN(), grid->getNZN(), EMf->getBz()); - - double ***Bx_ext = asgArr3(double, grid->getNXN(), grid->getNYN(), grid->getNZN(), EMf->getBx_ext()); - double ***By_ext = asgArr3(double, grid->getNXN(), grid->getNYN(), grid->getNZN(), EMf->getBy_ext()); - double ***Bz_ext = asgArr3(double, grid->getNXN(), grid->getNYN(), grid->getNZN(), EMf->getBz_ext()); - - double ****node_coordinate = asgArr4(double, grid->getNXN(), grid->getNYN(), grid->getNZN(), 3, grid->getN()); + double ***Ex = EMf->getEx(); + double ***Ey = EMf->getEy(); + double ***Ez = EMf->getEz(); + double ***Bx = EMf->getBx(); + double ***By = EMf->getBy(); + double ***Bz = EMf->getBz(); + + double ***Bx_ext = EMf->getBx_ext(); + double ***By_ext = EMf->getBy_ext(); + double ***Bz_ext = EMf->getBz_ext(); + + double ****node_coordinate = grid->getN(); const double dto2 = .5 * dt, qomdt2 = qom * dto2 / c; const double inv_dx = 1.0 / dx, inv_dy = 1.0 / dy, inv_dz = 1.0 / dz; From e8982ee1c62b10eb559d8bd8ac30576cc6f71752 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 23 Jul 2013 07:10:13 +0200 Subject: [PATCH 018/118] issue#31: use stdout rather than stderr in debug.h --- include/debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/debug.h b/include/debug.h index 6532f599..402d3819 100644 --- a/include/debug.h +++ b/include/debug.h @@ -11,7 +11,7 @@ void dfprintf_fileLine(FILE * fptr, const char *func, const char *file, int line_number, const char *format, ...); -#define dprintf(args...) dfprintf_fileLine(stderr, __func__, __FILE__, __LINE__,## args) +#define dprintf(args...) dfprintf_fileLine(stdout, __func__, __FILE__, __LINE__,## args) #define dprint(var) dprintvar_fileLine(__func__,__FILE__,__LINE__,#var,var); #define dprint0(var) dprint(var) #define declare_dprintvar_fileLine(type) \ From ed7accfb30a9ce4447490d2ac996d028b3836afa Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 23 Jul 2013 07:59:41 +0200 Subject: [PATCH 019/118] issue #40: replacing node_coordinate and center_coordinate arrays --- grids/Grid3DCU.cpp | 46 ++++++++++++++++++--------------------- include/Grid3DCU.h | 40 ++++++++++++++++++++++++++-------- particles/Particles3D.cpp | 14 +++++------- 3 files changed, 58 insertions(+), 42 deletions(-) diff --git a/grids/Grid3DCU.cpp b/grids/Grid3DCU.cpp index 973b20f8..b222e8d5 100644 --- a/grids/Grid3DCU.cpp +++ b/grids/Grid3DCU.cpp @@ -51,33 +51,29 @@ Grid3DCU::Grid3DCU(CollectiveIO * col, VirtualTopology3D * vct) { zEnd = zStart + (col->getLz() / (double) vct->getZLEN()); // arrays allocation: nodes ---> the first node has index 1, the last has index nxn-2! - node_coordinate = newArr4(double, nxn, nyn, nzn, 3); // 0 -> X, 1 -> Y, 2-> Z - for (int i = 0; i < nxn; i++) { - for (int j = 0; j < nyn; j++) { - for (int k = 0; k < nzn; k++) { - node_coordinate[i][j][k][0] = xStart + (i - 1) * dx; - node_coordinate[i][j][k][1] = yStart + (j - 1) * dy; - node_coordinate[i][j][k][2] = zStart + (k - 1) * dz; - } - } - } + node_xcoord = new double[nxn]; + node_ycoord = new double[nyn]; + node_zcoord = new double[nzn]; + for (int i=0; i the first cell has index 1, the last has index ncn-2! - center_coordinate = newArr4(double, nxc, nyc, nzc, 3); - for (int i = 0; i < nxc; i++) { - for (int j = 0; j < nyc; j++) { - for (int k = 0; k < nzc; k++) { - center_coordinate[i][j][k][0] = .5 * (node_coordinate[i][j][k][0] + node_coordinate[i + 1][j][k][0]); - center_coordinate[i][j][k][1] = .5 * (node_coordinate[i][j][k][1] + node_coordinate[i][j + 1][k][1]); - center_coordinate[i][j][k][2] = .5 * (node_coordinate[i][j][k][2] + node_coordinate[i][j][k + 1][2]); - } - } - } + center_xcoord = new double[nxc]; + center_ycoord = new double[nyc]; + center_zcoord = new double[nzc]; + for(int i=0; igetCoordinates(0) << "," << ptVCT->getCoordinates(1) << "," << ptVCT->getCoordinates(2) << ")" << endl; cout << "Number of cell: -X=" << nxc - 2 << " -Y=" << nyc - 2 << " -Z=" << nzc - 2 << endl; - cout << "Xin = " << node_coordinate[1][1][1][0] << "; Xfin = " << node_coordinate[nxn - 2][1][1][0] << endl; - cout << "Yin = " << node_coordinate[1][1][1][1] << "; Yfin = " << node_coordinate[1][nyn - 2][1][1] << endl; - cout << "Zin = " << node_coordinate[1][1][1][2] << "; Zfin = " << node_coordinate[1][1][nzn - 2][2] << endl; + cout << "Xin = " << node_xcoord[1] << "; Xfin = " << node_xcoord[nxn - 2] << endl; + cout << "Yin = " << node_ycoord[1] << "; Yfin = " << node_ycoord[nyn - 2] << endl; + cout << "Zin = " << node_zcoord[1] << "; Zfin = " << node_zcoord[nzn - 2] << endl; cout << endl; } diff --git a/include/Grid3DCU.h b/include/Grid3DCU.h index f085ea34..9d2e2f01 100644 --- a/include/Grid3DCU.h +++ b/include/Grid3DCU.h @@ -110,9 +110,13 @@ class Grid3DCU // :public Grid /** invol = inverse of volume*/ double invVOL; /** node coordinate */ - double ****node_coordinate; + double *node_xcoord; + double *node_ycoord; + double *node_zcoord; /** center coordinate */ - double ****center_coordinate; + double *center_xcoord; + double *center_ycoord; + double *center_zcoord; /** local grid boundaries coordinate */ double xStart, xEnd, yStart, yEnd, zStart, zEnd; @@ -126,13 +130,31 @@ class Grid3DCU // :public Grid double getDX() { return (dx); } double getDY() { return (dy); } double getDZ() { return (dz); } - double &getXN(int X, int Y, int Z) { return (node_coordinate[X][Y][Z][0]); } - double &getYN(int X, int Y, int Z) { return (node_coordinate[X][Y][Z][1]); } - double &getZN(int X, int Y, int Z) { return (node_coordinate[X][Y][Z][2]); } - double &getXC(int X, int Y, int Z) { return (center_coordinate[X][Y][Z][0]); } - double &getYC(int X, int Y, int Z) { return (center_coordinate[X][Y][Z][1]); } - double &getZC(int X, int Y, int Z) { return (center_coordinate[X][Y][Z][2]); } - double ****getN() { return node_coordinate; } + // + // coordinate accessors + // + // calculated equivalents (preferred for accelerator?): + //const double &calcXN(int X) { return xStart+(X-1)*dx;} + //const double &calcYN(int Y) { return yStart+(Y-1)*dy;} + //const double &calcZN(int Z) { return zStart+(Z-1)*dz;} + const double &getXN(int X) { return node_xcoord[X];} + const double &getYN(int Y) { return node_ycoord[Y];} + const double &getZN(int Z) { return node_zcoord[Z];} + const double &getXC(int X) { return center_xcoord[X];} + const double &getYC(int Y) { return center_ycoord[Y];} + const double &getZC(int Z) { return center_zcoord[Z];} + // + // The following could be eliminated in favor of the previous + // unless we truly anticipate generalizing to a deformed + // logically cartesian mesh. See issue #40. + // + const double &getXN(int X, int Y, int Z) { return node_xcoord[X];} + const double &getYN(int X, int Y, int Z) { return node_ycoord[Y];} + const double &getZN(int X, int Y, int Z) { return node_zcoord[Z];} + const double &getXC(int X, int Y, int Z) { return center_xcoord[X];} + const double &getYC(int X, int Y, int Z) { return center_ycoord[Y];} + const double &getZC(int X, int Y, int Z) { return center_zcoord[Z];} + // double getXstart() { return (xStart); } double getXend() { return (xEnd); } double getYstart() { return (yStart); } diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 9b475115..2be55cfb 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -327,8 +327,6 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { double ***By_ext = EMf->getBy_ext(); double ***Bz_ext = EMf->getBz_ext(); - double ****node_coordinate = grid->getN(); - const double dto2 = .5 * dt, qomdt2 = qom * dto2 / c; const double inv_dx = 1.0 / dx, inv_dy = 1.0 / dy, inv_dz = 1.0 / dz; assert_le(nop,INT_MAX); // else would need to use long long @@ -376,12 +374,12 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { double xi[2]; double eta[2]; double zeta[2]; - xi[0] = xp - node_coordinate[ix - 1][iy][iz][0]; - eta[0] = yp - node_coordinate[ix][iy - 1][iz][1]; - zeta[0] = zp - node_coordinate[ix][iy][iz - 1][2]; - xi[1] = node_coordinate[ix][iy][iz][0] - xp; - eta[1] = node_coordinate[ix][iy][iz][1] - yp; - zeta[1] = node_coordinate[ix][iy][iz][2] - zp; + xi[0] = xp - grid->getXN(ix-1); + eta[0] = yp - grid->getYN(iy-1); + zeta[0] = zp - grid->getZN(iz-1); + xi[1] = grid->getXN(ix) - xp; + eta[1] = grid->getYN(iy) - yp; + zeta[1] = grid->getZN(iz) - zp; double Exl = 0.0; double Eyl = 0.0; From 1c2ff247c80370659bc97c0e9b56544552477fac Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 23 Jul 2013 10:11:56 +0200 Subject: [PATCH 020/118] issue#41: interpP2G reimplemented as sumMoments in EMfields class --- fields/EMfields3D.cpp | 279 ++++++++++++++++++++++++++++++++-- fields/Moments.cpp | 4 +- include/EMfields3D.h | 2 + include/Moments.h | 104 +++++-------- include/Particles3D.h | 4 +- include/Particles3Dcomm.h | 10 +- main/iPic3Dlib.cpp | 4 +- particles/Particles3D.cpp | 142 ++++++++--------- particles/Particles3Dcomm.cpp | 29 ++-- 9 files changed, 407 insertions(+), 171 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index c6c5936d..7be94a18 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -1,6 +1,7 @@ #include #include "EMfields3D.h" +#include "Particles3Dcomm.h" #include "TimeTasks.h" #include "Moments.h" #include "ompdefs.h" @@ -169,8 +170,262 @@ EMfields3D::EMfields3D(Collective * col, Grid * grid) { for(int i=0;iinit(nxn,nyn,nzn,invVOL); - } + momentsArray[i]->init(nxn,nyn,nzn); + } +} + +// This was Particles3Dcomm::interpP2G() +void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTopology3D * vct) +{ + const double inv_dx = 1.0 / dx; + const double inv_dy = 1.0 / dy; + const double inv_dz = 1.0 / dz; + const int nxn = grid->getNXN(); + const int nyn = grid->getNYN(); + const int nzn = grid->getNZN(); + const double xstart = grid->getXstart(); + const double ystart = grid->getYstart(); + const double zstart = grid->getZstart(); + double const*const x = pcls.getXall(); + double const*const y = pcls.getYall(); + double const*const z = pcls.getZall(); + double const*const u = pcls.getUall(); + double const*const v = pcls.getVall(); + double const*const w = pcls.getWall(); + double const*const q = pcls.getQall(); + // + const int is = pcls.get_ns(); + double* rhons1d = rhons[is][0][0]; + double* Jxs1d = Jxs [is][0][0]; + double* Jys1d = Jys [is][0][0]; + double* Jzs1d = Jzs [is][0][0]; + double* pXXsn1d = pXXsn[is][0][0]; + double* pXYsn1d = pXYsn[is][0][0]; + double* pXZsn1d = pXZsn[is][0][0]; + double* pYYsn1d = pYYsn[is][0][0]; + double* pYZsn1d = pYZsn[is][0][0]; + double* pZZsn1d = pZZsn[is][0][0]; + // + const long long nop_ll = pcls.getNOP(); + const int nop = pcls.getNOP(); + assert_le(nop_ll,INT_MAX); // else would need to use long long + // To make memory use scale to a large number of threads, we + // could first apply an efficient parallel sorting algorithm + // to the particles and then accumulate moments in smaller + // subarrays. + //#ifdef _OPENMP + #pragma omp parallel + { + int thread_num = omp_get_thread_num(); + Moments& speciesMoments = fetch_momentsArray(thread_num); + speciesMoments.set_to_zero(); + // + double*** rho = speciesMoments.fetch_rho(); + double*** Jx = speciesMoments.fetch_Jx(); + double*** Jy = speciesMoments.fetch_Jy(); + double*** Jz = speciesMoments.fetch_Jz(); + double*** Pxx = speciesMoments.fetch_Pxx(); + double*** Pxy = speciesMoments.fetch_Pxy(); + double*** Pxz = speciesMoments.fetch_Pxz(); + double*** Pyy = speciesMoments.fetch_Pyy(); + double*** Pyz = speciesMoments.fetch_Pyz(); + double*** Pzz = speciesMoments.fetch_Pzz(); + // The following loop is expensive, so it is wise to assume that the + // compiler is stupid. Therefore we should on the one hand + // expand things out and on the other hand avoid repeating computations. + #pragma omp for + for (int i = 0; i < nop; i++) + { + // compute the quadratic moments of velocity + // + const double ui=u[i]; + const double vi=v[i]; + const double wi=w[i]; + const double uui=ui*ui; + const double uvi=ui*vi; + const double uwi=ui*wi; + const double vvi=vi*vi; + const double vwi=vi*wi; + const double wwi=wi*wi; + // + // compute the weights to distribute the moments + // + const int ix = 2 + int (floor((x[i] - xstart) * inv_dx)); + const int iy = 2 + int (floor((y[i] - ystart) * inv_dy)); + const int iz = 2 + int (floor((z[i] - zstart) * inv_dz)); + //const double xi0 = x[i] - grid->getXN(ix - 1, iy, iz); + //const double eta0 = y[i] - grid->getYN(ix, iy - 1, iz); + //const double zeta0 = z[i] - grid->getZN(ix, iy, iz - 1); + //const double xi1 = grid->getXN(ix, iy, iz) - x[i]; + //const double eta1 = grid->getYN(ix, iy, iz) - y[i]; + //const double zeta1 = grid->getZN(ix, iy, iz) - z[i]; + const double xi0 = x[i] - grid->getXN(ix-1); + const double eta0 = y[i] - grid->getYN(iy-1); + const double zeta0 = z[i] - grid->getZN(iz-1); + const double xi1 = grid->getXN(ix) - x[i]; + const double eta1 = grid->getYN(iy) - y[i]; + const double zeta1 = grid->getZN(iz) - z[i]; + const double qi = q[i]; + const double weight000 = qi * xi0 * eta0 * zeta0 * invVOL; + const double weight001 = qi * xi0 * eta0 * zeta1 * invVOL; + const double weight010 = qi * xi0 * eta1 * zeta0 * invVOL; + const double weight011 = qi * xi0 * eta1 * zeta1 * invVOL; + const double weight100 = qi * xi1 * eta0 * zeta0 * invVOL; + const double weight101 = qi * xi1 * eta0 * zeta1 * invVOL; + const double weight110 = qi * xi1 * eta1 * zeta0 * invVOL; + const double weight111 = qi * xi1 * eta1 * zeta1 * invVOL; + // + // use the weight to distribute the moments + // + // add charge density + //speciesMoments.addRho(weight, ix, iy, iz); + rho[ix ][iy ][iz ] += weight000; + rho[ix ][iy ][iz-1] += weight001; + rho[ix ][iy-1][iz ] += weight010; + rho[ix ][iy-1][iz-1] += weight011; + rho[ix-1][iy ][iz ] += weight100; + rho[ix-1][iy ][iz-1] += weight101; + rho[ix-1][iy-1][iz ] += weight110; + rho[ix-1][iy-1][iz-1] += weight111; + // add current density - X + //speciesMoments.addJx(temp, ix, iy, iz); + Jx[ix ][iy ][iz ] += ui*weight000; + Jx[ix ][iy ][iz-1] += ui*weight001; + Jx[ix ][iy-1][iz ] += ui*weight010; + Jx[ix ][iy-1][iz-1] += ui*weight011; + Jx[ix-1][iy ][iz ] += ui*weight100; + Jx[ix-1][iy ][iz-1] += ui*weight101; + Jx[ix-1][iy-1][iz ] += ui*weight110; + Jx[ix-1][iy-1][iz-1] += ui*weight111; + // add current density - Y + //speciesMoments.addJy(temp, ix, iy, iz); + Jy[ix ][iy ][iz ] += vi*weight000; + Jy[ix ][iy ][iz-1] += vi*weight001; + Jy[ix ][iy-1][iz ] += vi*weight010; + Jy[ix ][iy-1][iz-1] += vi*weight011; + Jy[ix-1][iy ][iz ] += vi*weight100; + Jy[ix-1][iy ][iz-1] += vi*weight101; + Jy[ix-1][iy-1][iz ] += vi*weight110; + Jy[ix-1][iy-1][iz-1] += vi*weight111; + // add current density - Z + //speciesMoments.addJz(temp, ix, iy, iz); + Jz[ix ][iy ][iz ] += wi*weight000; + Jz[ix ][iy ][iz-1] += wi*weight001; + Jz[ix ][iy-1][iz ] += wi*weight010; + Jz[ix ][iy-1][iz-1] += wi*weight011; + Jz[ix-1][iy ][iz ] += wi*weight100; + Jz[ix-1][iy ][iz-1] += wi*weight101; + Jz[ix-1][iy-1][iz ] += wi*weight110; + Jz[ix-1][iy-1][iz-1] += wi*weight111; + // Pxx - add pressure tensor + //speciesMoments.addPxx(temp, ix, iy, iz); + Pxx[ix ][iy ][iz ] += uui*weight000; + Pxx[ix ][iy ][iz-1] += uui*weight001; + Pxx[ix ][iy-1][iz ] += uui*weight010; + Pxx[ix ][iy-1][iz-1] += uui*weight011; + Pxx[ix-1][iy ][iz ] += uui*weight100; + Pxx[ix-1][iy ][iz-1] += uui*weight101; + Pxx[ix-1][iy-1][iz ] += uui*weight110; + Pxx[ix-1][iy-1][iz-1] += uui*weight111; + // Pxy - add pressure tensor + //speciesMoments.addPxy(temp, ix, iy, iz); + Pxy[ix ][iy ][iz ] += uvi*weight000; + Pxy[ix ][iy ][iz-1] += uvi*weight001; + Pxy[ix ][iy-1][iz ] += uvi*weight010; + Pxy[ix ][iy-1][iz-1] += uvi*weight011; + Pxy[ix-1][iy ][iz ] += uvi*weight100; + Pxy[ix-1][iy ][iz-1] += uvi*weight101; + Pxy[ix-1][iy-1][iz ] += uvi*weight110; + Pxy[ix-1][iy-1][iz-1] += uvi*weight111; + // Pxz - add pressure tensor + //speciesMoments.addPxz(temp, ix, iy, iz); + Pxz[ix ][iy ][iz ] += uwi*weight000; + Pxz[ix ][iy ][iz-1] += uwi*weight001; + Pxz[ix ][iy-1][iz ] += uwi*weight010; + Pxz[ix ][iy-1][iz-1] += uwi*weight011; + Pxz[ix-1][iy ][iz ] += uwi*weight100; + Pxz[ix-1][iy ][iz-1] += uwi*weight101; + Pxz[ix-1][iy-1][iz ] += uwi*weight110; + Pxz[ix-1][iy-1][iz-1] += uwi*weight111; + // Pyy - add pressure tensor + //speciesMoments.addPyy(temp, ix, iy, iz); + Pyy[ix ][iy ][iz ] += vvi*weight000; + Pyy[ix ][iy ][iz-1] += vvi*weight001; + Pyy[ix ][iy-1][iz ] += vvi*weight010; + Pyy[ix ][iy-1][iz-1] += vvi*weight011; + Pyy[ix-1][iy ][iz ] += vvi*weight100; + Pyy[ix-1][iy ][iz-1] += vvi*weight101; + Pyy[ix-1][iy-1][iz ] += vvi*weight110; + Pyy[ix-1][iy-1][iz-1] += vvi*weight111; + // Pyz - add pressure tensor + //speciesMoments.addPyz(temp, ix, iy, iz); + Pyz[ix ][iy ][iz ] += vwi*weight000; + Pyz[ix ][iy ][iz-1] += vwi*weight001; + Pyz[ix ][iy-1][iz ] += vwi*weight010; + Pyz[ix ][iy-1][iz-1] += vwi*weight011; + Pyz[ix-1][iy ][iz ] += vwi*weight100; + Pyz[ix-1][iy ][iz-1] += vwi*weight101; + Pyz[ix-1][iy-1][iz ] += vwi*weight110; + Pyz[ix-1][iy-1][iz-1] += vwi*weight111; + // Pzz - add pressure tensor + //speciesMoments.addPzz(temp, ix, iy, iz); + Pzz[ix ][iy ][iz ] += wwi*weight000; + Pzz[ix ][iy ][iz-1] += wwi*weight001; + Pzz[ix ][iy-1][iz ] += wwi*weight010; + Pzz[ix ][iy-1][iz-1] += wwi*weight011; + Pzz[ix-1][iy ][iz ] += wwi*weight100; + Pzz[ix-1][iy ][iz-1] += wwi*weight101; + Pzz[ix-1][iy-1][iz ] += wwi*weight110; + Pzz[ix-1][iy-1][iz-1] += wwi*weight111; + } + // The following way is an equivalent reduction but less + // efficient for a large number of threads. + // + //#pragma omp critical + //addToSpeciesMoments(speciesMoments,is); + // + // Instead we split up the reduction tasks. + // + // One-dimensional array access is presumably + // more efficient on poor compilers. + // + const double*const rho1d = rho[0][0]; + const double*const Jx1d = Jx [0][0]; + const double*const Jy1d = Jy [0][0]; + const double*const Jz1d = Jz [0][0]; + const double*const Pxx1d = Pxx[0][0]; + const double*const Pxy1d = Pxy[0][0]; + const double*const Pxz1d = Pxz[0][0]; + const double*const Pyy1d = Pyy[0][0]; + const double*const Pyz1d = Pyz[0][0]; + const double*const Pzz1d = Pzz[0][0]; + // + assert_eq(speciesMoments.get_nx(), nxn); + assert_eq(speciesMoments.get_ny(), nyn); + assert_eq(speciesMoments.get_nz(), nzn); + const int numel = nxn*nyn*nzn; + #pragma omp critical + for(int i=0;i Particles interpolation */ void communicateGhostP2G(int ns, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, VirtualTopology3D * vct); + void sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTopology3D * vct); /*! add accumulated moments to the moments for a given species */ void addToSpeciesMoments(const Moments & in, int is); /*! add an amount of charge density to charge density field at node X,Y,Z */ diff --git a/include/Moments.h b/include/Moments.h index d4349ecc..e90e01db 100644 --- a/include/Moments.h +++ b/include/Moments.h @@ -5,7 +5,6 @@ // class Moments { private: - double invVOL; double ***rho; /** current density, defined on nodes */ @@ -24,55 +23,38 @@ class Moments { int ny; int nz; public: - int get_nx() const { - return nx; - } - int get_ny() const { - return ny; - } - int get_nz() const { - return nz; - } - double get_invVOL() const { - return invVOL; - } - double get_rho(int i, int j, int k) const { - return rho[i][j][k]; - } - double get_Jx(int i, int j, int k) const { - return Jx[i][j][k]; - } - double get_Jy(int i, int j, int k) const { - return Jy[i][j][k]; - } - double get_Jz(int i, int j, int k) const { - return Jz[i][j][k]; - } - double get_pXX(int i, int j, int k) const { - return pXX[i][j][k]; - } - double get_pXY(int i, int j, int k) const { - return pXY[i][j][k]; - } - double get_pXZ(int i, int j, int k) const { - return pXZ[i][j][k]; - } - double get_pYY(int i, int j, int k) const { - return pYY[i][j][k]; - } - double get_pYZ(int i, int j, int k) const { - return pYZ[i][j][k]; - } - double get_pZZ(int i, int j, int k) const { - return pZZ[i][j][k]; - } + // get accessors (read access) + int get_nx() const { return nx; } + int get_ny() const { return ny; } + int get_nz() const { return nz; } + double get_rho(int i, int j, int k) const { return rho[i][j][k]; } + double get_Jx (int i, int j, int k) const { return Jx [i][j][k]; } + double get_Jy (int i, int j, int k) const { return Jy [i][j][k]; } + double get_Jz (int i, int j, int k) const { return Jz [i][j][k]; } + double get_pXX(int i, int j, int k) const { return pXX[i][j][k]; } + double get_pXY(int i, int j, int k) const { return pXY[i][j][k]; } + double get_pXZ(int i, int j, int k) const { return pXZ[i][j][k]; } + double get_pYY(int i, int j, int k) const { return pYY[i][j][k]; } + double get_pYZ(int i, int j, int k) const { return pYZ[i][j][k]; } + double get_pZZ(int i, int j, int k) const { return pZZ[i][j][k]; } + // fetch accessors (write access) + double*** fetch_rho() { return rho; } + double*** fetch_Jx () { return Jx ; } + double*** fetch_Jy () { return Jy ; } + double*** fetch_Jz () { return Jz ; } + double*** fetch_Pxx() { return pXX; } + double*** fetch_Pxy() { return pXY; } + double*** fetch_Pxz() { return pXZ; } + double*** fetch_Pyy() { return pYY; } + double*** fetch_Pyz() { return pYZ; } + double*** fetch_Pzz() { return pZZ; } public: Moments() { }; - Moments(int nx_, int ny_, int nz_, double invVOL_){ - init(nx_,ny_,nz_,invVOL_); + Moments(int nx_, int ny_, int nz_){ + init(nx_,ny_,nz_); } - void init(int nx_, int ny_, int nz_, double invVOL_); + void init(int nx_, int ny_, int nz_); ~Moments(); void set_to_zero(); void addRho(double weight[][2][2], int X, int Y, int Z); @@ -93,8 +75,7 @@ inline void Moments::addRho(double weight[][2][2], int X, int Y, int Z) { for (int i = 0; i < 2; i++) for (int j = 0; j < 2; j++) for (int k = 0; k < 2; k++) { - const double temp = weight[i][j][k] * invVOL; - rho[X - i][Y - j][Z - k] += temp; + rho[X - i][Y - j][Z - k] += weight[i][j][k]; } } /** add an amount of charge density to current density - direction X to current density field on the node*/ @@ -102,8 +83,7 @@ inline void Moments::addJx(double weight[][2][2], int X, int Y, int Z) { for (int i = 0; i < 2; i++) for (int j = 0; j < 2; j++) for (int k = 0; k < 2; k++){ - const double temp = weight[i][j][k] * invVOL; - Jx[X - i][Y - j][Z - k] += temp; + Jx[X - i][Y - j][Z - k] += weight[i][j][k]; } } /** add an amount of current density - direction Y to current density field on the node */ @@ -111,8 +91,7 @@ inline void Moments::addJy(double weight[][2][2], int X, int Y, int Z) { for (int i = 0; i < 2; i++) for (int j = 0; j < 2; j++) for (int k = 0; k < 2; k++){ - const double temp = weight[i][j][k] * invVOL; - Jy[X - i][Y - j][Z - k] += temp; + Jy[X - i][Y - j][Z - k] += weight[i][j][k]; } } /** add an amount of current density - direction Z to current density field on the node */ @@ -120,8 +99,7 @@ inline void Moments::addJz(double weight[][2][2], int X, int Y, int Z) { for (int i = 0; i < 2; i++) for (int j = 0; j < 2; j++) for (int k = 0; k < 2; k++){ - const double temp = weight[i][j][k] * invVOL; - Jz[X - i][Y - j][Z - k] += temp; + Jz[X - i][Y - j][Z - k] += weight[i][j][k]; } } /** add an amount of pressure density - direction XX to current density field on the node */ @@ -129,8 +107,7 @@ inline void Moments::addPxx(double weight[][2][2], int X, int Y, int Z) { for (int i = 0; i < 2; i++) for (int j = 0; j < 2; j++) for (int k = 0; k < 2; k++){ - const double temp = weight[i][j][k] * invVOL; - pXX[X - i][Y - j][Z - k] += temp; + pXX[X - i][Y - j][Z - k] += weight[i][j][k]; } } /** add an amount of pressure density - direction XY to current density field on the node*/ @@ -138,8 +115,7 @@ inline void Moments::addPxy(double weight[][2][2], int X, int Y, int Z) { for (int i = 0; i < 2; i++) for (int j = 0; j < 2; j++) for (int k = 0; k < 2; k++){ - const double temp = weight[i][j][k] * invVOL; - pXY[X - i][Y - j][Z - k] += temp; + pXY[X - i][Y - j][Z - k] += weight[i][j][k]; } } /** add an amount of pressure density - direction XZ to current density field on the node */ @@ -147,8 +123,7 @@ inline void Moments::addPxz(double weight[][2][2], int X, int Y, int Z) { for (int i = 0; i < 2; i++) for (int j = 0; j < 2; j++) for (int k = 0; k < 2; k++){ - const double temp = weight[i][j][k] * invVOL; - pXZ[X - i][Y - j][Z - k] += temp; + pXZ[X - i][Y - j][Z - k] += weight[i][j][k]; } } /** add an amount of pressure density - direction YY to current density field on the node*/ @@ -156,8 +131,7 @@ inline void Moments::addPyy(double weight[][2][2], int X, int Y, int Z) { for (int i = 0; i < 2; i++) for (int j = 0; j < 2; j++) for (int k = 0; k < 2; k++){ - const double temp = weight[i][j][k] * invVOL; - pYY[X - i][Y - j][Z - k] += temp; + pYY[X - i][Y - j][Z - k] += weight[i][j][k]; } } /** add an amount of pressure density - direction YZ to current density field on the node */ @@ -165,8 +139,7 @@ inline void Moments::addPyz(double weight[][2][2], int X, int Y, int Z) { for (int i = 0; i < 2; i++) for (int j = 0; j < 2; j++) for (int k = 0; k < 2; k++){ - const double temp = weight[i][j][k] * invVOL; - pYZ[X - i][Y - j][Z - k] += temp; + pYZ[X - i][Y - j][Z - k] += weight[i][j][k]; } } /** add an amount of pressure density - direction ZZ to current density field on the node */ @@ -174,8 +147,7 @@ inline void Moments::addPzz(double weight[][2][2], int X, int Y, int Z) { for (int i = 0; i < 2; i++) for (int j = 0; j < 2; j++) for (int k = 0; k < 2; k++){ - const double temp = weight[i][j][k] * invVOL; - pZZ[X - i][Y - j][Z - k] += temp; + pZZ[X - i][Y - j][Z - k] += weight[i][j][k]; } } diff --git a/include/Particles3D.h b/include/Particles3D.h index 2c178918..fd89edd0 100644 --- a/include/Particles3D.h +++ b/include/Particles3D.h @@ -63,9 +63,9 @@ class Particles3D:public Particles3Dcomm { /** particle repopulator */ int particle_repopulator(Grid* grid,VirtualTopology3D* vct, Field* EMf); /** interpolation Particle->Grid only charge density, current */ - void interpP2G_notP(Field * EMf, Grid * grid, VirtualTopology3D * vct); + //void interpP2G_notP(Field * EMf, Grid * grid, VirtualTopology3D * vct); /** interpolation Particle->Grid only for pressure tensor */ - void interpP2G_onlyP(Field * EMf, Grid * grid, VirtualTopology3D * vct); + //void interpP2G_onlyP(Field * EMf, Grid * grid, VirtualTopology3D * vct); /*! Delete the particles inside the sphere with radius R and center x_center y_center and return the total charge removed */ double deleteParticlesInsideSphere(double R, double x_center, double y_center, double z_center); diff --git a/include/Particles3Dcomm.h b/include/Particles3Dcomm.h index d69cf069..29600957 100644 --- a/include/Particles3Dcomm.h +++ b/include/Particles3Dcomm.h @@ -10,7 +10,7 @@ developers: Stefano Markidis, Giovanni Lapenta #include "Particles.h" /** * - * Abstract class for particles of the same species, in a 2D space and 3component velocity with communications methods + * class for particles of the same species with communications methods * @date Fri Jun 4 2007 * @author Stefano Markidis, Giovanni Lapenta * @version 2.0 @@ -26,7 +26,7 @@ class Particles3Dcomm:public Particles { void allocate(int species, CollectiveIO * col, VirtualTopology3D * vct, Grid * grid); /** calculate the weights given the position of particles */ - void calculateWeights(double weight[][2][2], double xp, double yp, double zp, int ix, int iy, int iz, Grid * grid); + //void calculateWeights(double weight[][2][2], double xp, double yp, double zp, int ix, int iy, int iz, Grid * grid); /** interpolation method GRID->PARTICLE order 1: CIC */ void interpP2G(Field * EMf, Grid * grid, VirtualTopology3D * vct); /** method for communicating exiting particles to X-RIGHT, X-LEFT, Y-RIGHT, Y-LEFT, Z-RIGHT, Z-LEFT processes */ @@ -104,8 +104,12 @@ class Particles3Dcomm:public Particles { /** Print the number of particles of this subdomain */ void PrintNp(VirtualTopology3D * ptVCT) const; +public: + // accessors + int get_ns()const{return ns;} + protected: - /** number of species */ + /** number of this species */ int ns; /** maximum number of particles of this species on this domain. used for memory allocation */ long long npmax; diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 418fc8ce..852817b5 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -175,7 +175,9 @@ void c_Solver::CalculateField() { for (int i = 0; i < ns; i++) { - part[i].interpP2G(EMf, grid, vct); // interpolate Particles to Grid(Nodes) + // interpolate particles to grid nodes + EMf->sumMoments(part[i], grid, vct); + //part[i].interpP2G(EMf, grid, vct); } EMf->sumOverSpecies(vct); // sum all over the species diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 2be55cfb..6b55f301 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -907,78 +907,78 @@ int Particles3D::particle_repopulator(Grid* grid,VirtualTopology3D* vct, Field* } /** interpolation Particle->Grid only for pressure tensor */ -void Particles3D::interpP2G_onlyP(Field * EMf, Grid * grid, VirtualTopology3D * vct) { - double weight[2][2][2]; - double temp[2][2][2]; - int ix, iy, iz, temp1, temp2, temp3; - for (register long long i = 0; i < nop; i++) { - ix = 2 + int (floor((x[i] - grid->getXstart()) / grid->getDX())); - iy = 2 + int (floor((y[i] - grid->getYstart()) / grid->getDY())); - iz = 2 + int (floor((z[i] - grid->getZstart()) / grid->getDZ())); - calculateWeights(weight, x[i], y[i], z[i], ix, iy, iz, grid); - scale(weight, q[i], 2, 2, 2); - // Pxx - eqValue(0.0, temp, 2, 2, 2); - addscale(u[i] * u[i], temp, weight, 2, 2, 2); - EMf->addPxx(temp, ix, iy, iz, ns); - // Pxy - eqValue(0.0, temp, 2, 2, 2); - addscale(u[i] * v[i], temp, weight, 2, 2, 2); - EMf->addPxy(temp, ix, iy, iz, ns); - // Pxz - eqValue(0.0, temp, 2, 2, 2); - addscale(u[i] * w[i], temp, weight, 2, 2, 2); - EMf->addPxz(temp, ix, iy, iz, ns); - // Pyy - eqValue(0.0, temp, 2, 2, 2); - addscale(v[i] * v[i], temp, weight, 2, 2, 2); - EMf->addPyy(temp, ix, iy, iz, ns); - // Pyz - eqValue(0.0, temp, 2, 2, 2); - addscale(v[i] * w[i], temp, weight, 2, 2, 2); - EMf->addPyz(temp, ix, iy, iz, ns); - // Pzz - eqValue(0.0, temp, 2, 2, 2); - addscale(w[i] * w[i], temp, weight, 2, 2, 2); - EMf->addPzz(temp, ix, iy, iz, ns); - } -} +//void Particles3D::interpP2G_onlyP(Field * EMf, Grid * grid, VirtualTopology3D * vct) { +// double weight[2][2][2]; +// double temp[2][2][2]; +// int ix, iy, iz, temp1, temp2, temp3; +// for (register long long i = 0; i < nop; i++) { +// ix = 2 + int (floor((x[i] - grid->getXstart()) / grid->getDX())); +// iy = 2 + int (floor((y[i] - grid->getYstart()) / grid->getDY())); +// iz = 2 + int (floor((z[i] - grid->getZstart()) / grid->getDZ())); +// calculateWeights(weight, x[i], y[i], z[i], ix, iy, iz, grid); +// scale(weight, q[i], 2, 2, 2); +// // Pxx +// eqValue(0.0, temp, 2, 2, 2); +// addscale(u[i] * u[i], temp, weight, 2, 2, 2); +// EMf->addPxx(temp, ix, iy, iz, ns); +// // Pxy +// eqValue(0.0, temp, 2, 2, 2); +// addscale(u[i] * v[i], temp, weight, 2, 2, 2); +// EMf->addPxy(temp, ix, iy, iz, ns); +// // Pxz +// eqValue(0.0, temp, 2, 2, 2); +// addscale(u[i] * w[i], temp, weight, 2, 2, 2); +// EMf->addPxz(temp, ix, iy, iz, ns); +// // Pyy +// eqValue(0.0, temp, 2, 2, 2); +// addscale(v[i] * v[i], temp, weight, 2, 2, 2); +// EMf->addPyy(temp, ix, iy, iz, ns); +// // Pyz +// eqValue(0.0, temp, 2, 2, 2); +// addscale(v[i] * w[i], temp, weight, 2, 2, 2); +// EMf->addPyz(temp, ix, iy, iz, ns); +// // Pzz +// eqValue(0.0, temp, 2, 2, 2); +// addscale(w[i] * w[i], temp, weight, 2, 2, 2); +// EMf->addPzz(temp, ix, iy, iz, ns); +// } +//} /** interpolation Particle->Grid only charge density, current */ -void Particles3D::interpP2G_notP(Field * EMf, Grid * grid, VirtualTopology3D * vct) { - double weight[2][2][2]; - double temp[2][2][2]; - int ix, iy, iz, temp2, temp1, temp3; - for (register long long i = 0; i < nop; i++) { - ix = 2 + int (floor((x[i] - grid->getXstart()) / grid->getDX())); - iy = 2 + int (floor((y[i] - grid->getYstart()) / grid->getDY())); - iz = 2 + int (floor((z[i] - grid->getZstart()) / grid->getDZ())); - temp1 = (int) min(ix, nxn - 2); - temp2 = (int) min(iy, nyn - 2); - temp3 = (int) min(iz, nzn - 2); - ix = (int) max(temp1, 2); - iy = (int) max(temp2, 2); - iz = (int) max(temp3, 2); - calculateWeights(weight, x[i], y[i], z[i], ix, iy, iz, grid); - scale(weight, q[i], 2, 2, 2); - // rho - EMf->addRho(weight, ix, iy, iz, ns); - // Jx - eqValue(0.0, temp, 2, 2, 2); - addscale(u[i], temp, weight, 2, 2, 2); - EMf->addJx(temp, ix, iy, iz, ns); - // Jy - eqValue(0.0, temp, 2, 2, 2); - addscale(v[i], temp, weight, 2, 2, 2); - EMf->addJy(temp, ix, iy, iz, ns); - // Jz - eqValue(0.0, temp, 2, 2, 2); - addscale(w[i], temp, weight, 2, 2, 2); - EMf->addJz(temp, ix, iy, iz, ns); - - } - // communicate contribution from ghost cells - EMf->communicateGhostP2G(ns, 0, 0, 0, 0, vct); -} +//void Particles3D::interpP2G_notP(Field * EMf, Grid * grid, VirtualTopology3D * vct) { +// double weight[2][2][2]; +// double temp[2][2][2]; +// int ix, iy, iz, temp2, temp1, temp3; +// for (register long long i = 0; i < nop; i++) { +// ix = 2 + int (floor((x[i] - grid->getXstart()) / grid->getDX())); +// iy = 2 + int (floor((y[i] - grid->getYstart()) / grid->getDY())); +// iz = 2 + int (floor((z[i] - grid->getZstart()) / grid->getDZ())); +// temp1 = (int) min(ix, nxn - 2); +// temp2 = (int) min(iy, nyn - 2); +// temp3 = (int) min(iz, nzn - 2); +// ix = (int) max(temp1, 2); +// iy = (int) max(temp2, 2); +// iz = (int) max(temp3, 2); +// calculateWeights(weight, x[i], y[i], z[i], ix, iy, iz, grid); +// scale(weight, q[i], 2, 2, 2); +// // rho +// EMf->addRho(weight, ix, iy, iz, ns); +// // Jx +// eqValue(0.0, temp, 2, 2, 2); +// addscale(u[i], temp, weight, 2, 2, 2); +// EMf->addJx(temp, ix, iy, iz, ns); +// // Jy +// eqValue(0.0, temp, 2, 2, 2); +// addscale(v[i], temp, weight, 2, 2, 2); +// EMf->addJy(temp, ix, iy, iz, ns); +// // Jz +// eqValue(0.0, temp, 2, 2, 2); +// addscale(w[i], temp, weight, 2, 2, 2); +// EMf->addJz(temp, ix, iy, iz, ns); +// +// } +// // communicate contribution from ghost cells +// EMf->communicateGhostP2G(ns, 0, 0, 0, 0, vct); +//} /** apply a linear perturbation to particle distribution */ void Particles3D::linear_perturbation(double deltaBoB, double kx, double ky, double angle, double omega_r, double omega_i, double Ex_mod, double Ex_phase, double Ey_mod, double Ey_phase, double Ez_mod, double Ez_phase, double Bx_mod, double Bx_phase, double By_mod, double By_phase, double Bz_mod, double Bz_phase, Grid * grid, Field * EMf, VirtualTopology3D * vct) { diff --git a/particles/Particles3Dcomm.cpp b/particles/Particles3Dcomm.cpp index d339894d..82ad7b23 100644 --- a/particles/Particles3Dcomm.cpp +++ b/particles/Particles3Dcomm.cpp @@ -282,22 +282,23 @@ void Particles3Dcomm::allocate(int species, CollectiveIO * col, VirtualTopology3 } /** calculate the weights given the position of particles 0,0,0 is the left,left, left node */ -void Particles3Dcomm::calculateWeights(double weight[][2][2], double xp, double yp, double zp, int ix, int iy, int iz, Grid * grid) { - double xi[2], eta[2], zeta[2]; - xi[0] = xp - grid->getXN(ix - 1, iy, iz); - eta[0] = yp - grid->getYN(ix, iy - 1, iz); - zeta[0] = zp - grid->getZN(ix, iy, iz - 1); - xi[1] = grid->getXN(ix, iy, iz) - xp; - eta[1] = grid->getYN(ix, iy, iz) - yp; - zeta[1] = grid->getZN(ix, iy, iz) - zp; - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) - weight[i][j][k] = xi[i] * eta[j] * zeta[k] * invVOL; -} +//void Particles3Dcomm::calculateWeights(double weight[][2][2], double xp, double yp, double zp, int ix, int iy, int iz, Grid * grid) { +// double xi[2], eta[2], zeta[2]; +// xi[0] = xp - grid->getXN(ix - 1, iy, iz); +// eta[0] = yp - grid->getYN(ix, iy - 1, iz); +// zeta[0] = zp - grid->getZN(ix, iy, iz - 1); +// xi[1] = grid->getXN(ix, iy, iz) - xp; +// eta[1] = grid->getYN(ix, iy, iz) - yp; +// zeta[1] = grid->getZN(ix, iy, iz) - zp; +// for (int i = 0; i < 2; i++) +// for (int j = 0; j < 2; j++) +// for (int k = 0; k < 2; k++) +// weight[i][j][k] = xi[i] * eta[j] * zeta[k] * invVOL; +//} -/** Interpolation Particle --> Grid */ +// move this to EMfields3D class +// void Particles3Dcomm::interpP2G(Field * EMf, Grid * grid, VirtualTopology3D * vct) { const double inv_dx = 1.0 / dx; const double inv_dy = 1.0 / dy; From 8187fbdbc45bf57ffc396d17980c103a04823874 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 23 Jul 2013 10:20:43 +0200 Subject: [PATCH 021/118] remove code commented in previous commit (iss #41) --- particles/Particles3D.cpp | 73 ----------------------------------- particles/Particles3Dcomm.cpp | 22 ----------- 2 files changed, 95 deletions(-) diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 6b55f301..cc83f3f0 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -906,79 +906,6 @@ int Particles3D::particle_repopulator(Grid* grid,VirtualTopology3D* vct, Field* return(0); // exit succcesfully (hopefully) } -/** interpolation Particle->Grid only for pressure tensor */ -//void Particles3D::interpP2G_onlyP(Field * EMf, Grid * grid, VirtualTopology3D * vct) { -// double weight[2][2][2]; -// double temp[2][2][2]; -// int ix, iy, iz, temp1, temp2, temp3; -// for (register long long i = 0; i < nop; i++) { -// ix = 2 + int (floor((x[i] - grid->getXstart()) / grid->getDX())); -// iy = 2 + int (floor((y[i] - grid->getYstart()) / grid->getDY())); -// iz = 2 + int (floor((z[i] - grid->getZstart()) / grid->getDZ())); -// calculateWeights(weight, x[i], y[i], z[i], ix, iy, iz, grid); -// scale(weight, q[i], 2, 2, 2); -// // Pxx -// eqValue(0.0, temp, 2, 2, 2); -// addscale(u[i] * u[i], temp, weight, 2, 2, 2); -// EMf->addPxx(temp, ix, iy, iz, ns); -// // Pxy -// eqValue(0.0, temp, 2, 2, 2); -// addscale(u[i] * v[i], temp, weight, 2, 2, 2); -// EMf->addPxy(temp, ix, iy, iz, ns); -// // Pxz -// eqValue(0.0, temp, 2, 2, 2); -// addscale(u[i] * w[i], temp, weight, 2, 2, 2); -// EMf->addPxz(temp, ix, iy, iz, ns); -// // Pyy -// eqValue(0.0, temp, 2, 2, 2); -// addscale(v[i] * v[i], temp, weight, 2, 2, 2); -// EMf->addPyy(temp, ix, iy, iz, ns); -// // Pyz -// eqValue(0.0, temp, 2, 2, 2); -// addscale(v[i] * w[i], temp, weight, 2, 2, 2); -// EMf->addPyz(temp, ix, iy, iz, ns); -// // Pzz -// eqValue(0.0, temp, 2, 2, 2); -// addscale(w[i] * w[i], temp, weight, 2, 2, 2); -// EMf->addPzz(temp, ix, iy, iz, ns); -// } -//} -/** interpolation Particle->Grid only charge density, current */ -//void Particles3D::interpP2G_notP(Field * EMf, Grid * grid, VirtualTopology3D * vct) { -// double weight[2][2][2]; -// double temp[2][2][2]; -// int ix, iy, iz, temp2, temp1, temp3; -// for (register long long i = 0; i < nop; i++) { -// ix = 2 + int (floor((x[i] - grid->getXstart()) / grid->getDX())); -// iy = 2 + int (floor((y[i] - grid->getYstart()) / grid->getDY())); -// iz = 2 + int (floor((z[i] - grid->getZstart()) / grid->getDZ())); -// temp1 = (int) min(ix, nxn - 2); -// temp2 = (int) min(iy, nyn - 2); -// temp3 = (int) min(iz, nzn - 2); -// ix = (int) max(temp1, 2); -// iy = (int) max(temp2, 2); -// iz = (int) max(temp3, 2); -// calculateWeights(weight, x[i], y[i], z[i], ix, iy, iz, grid); -// scale(weight, q[i], 2, 2, 2); -// // rho -// EMf->addRho(weight, ix, iy, iz, ns); -// // Jx -// eqValue(0.0, temp, 2, 2, 2); -// addscale(u[i], temp, weight, 2, 2, 2); -// EMf->addJx(temp, ix, iy, iz, ns); -// // Jy -// eqValue(0.0, temp, 2, 2, 2); -// addscale(v[i], temp, weight, 2, 2, 2); -// EMf->addJy(temp, ix, iy, iz, ns); -// // Jz -// eqValue(0.0, temp, 2, 2, 2); -// addscale(w[i], temp, weight, 2, 2, 2); -// EMf->addJz(temp, ix, iy, iz, ns); -// -// } -// // communicate contribution from ghost cells -// EMf->communicateGhostP2G(ns, 0, 0, 0, 0, vct); -//} /** apply a linear perturbation to particle distribution */ void Particles3D::linear_perturbation(double deltaBoB, double kx, double ky, double angle, double omega_r, double omega_i, double Ex_mod, double Ex_phase, double Ey_mod, double Ey_phase, double Ez_mod, double Ez_phase, double Bx_mod, double Bx_phase, double By_mod, double By_phase, double Bz_mod, double Bz_phase, Grid * grid, Field * EMf, VirtualTopology3D * vct) { diff --git a/particles/Particles3Dcomm.cpp b/particles/Particles3Dcomm.cpp index 82ad7b23..2867068f 100644 --- a/particles/Particles3Dcomm.cpp +++ b/particles/Particles3Dcomm.cpp @@ -281,20 +281,6 @@ void Particles3Dcomm::allocate(int species, CollectiveIO * col, VirtualTopology3 } } -/** calculate the weights given the position of particles 0,0,0 is the left,left, left node */ -//void Particles3Dcomm::calculateWeights(double weight[][2][2], double xp, double yp, double zp, int ix, int iy, int iz, Grid * grid) { -// double xi[2], eta[2], zeta[2]; -// xi[0] = xp - grid->getXN(ix - 1, iy, iz); -// eta[0] = yp - grid->getYN(ix, iy - 1, iz); -// zeta[0] = zp - grid->getZN(ix, iy, iz - 1); -// xi[1] = grid->getXN(ix, iy, iz) - xp; -// eta[1] = grid->getYN(ix, iy, iz) - yp; -// zeta[1] = grid->getZN(ix, iy, iz) - zp; -// for (int i = 0; i < 2; i++) -// for (int j = 0; j < 2; j++) -// for (int k = 0; k < 2; k++) -// weight[i][j][k] = xi[i] * eta[j] * zeta[k] * invVOL; -//} // move this to EMfields3D class @@ -338,14 +324,6 @@ void Particles3Dcomm::interpP2G(Field * EMf, Grid * grid, VirtualTopology3D * vc for (int kk = 0; kk < 2; kk++) { weight[ii][jj][kk] = q[i] * xi[ii] * eta[jj] * zeta[kk] * invVOL; } - //weight[0][0][0] = q[i] * xi[0] * eta[0] * zeta[0] * invVOL; - //weight[0][0][1] = q[i] * xi[0] * eta[0] * zeta[1] * invVOL; - //weight[0][1][0] = q[i] * xi[0] * eta[1] * zeta[0] * invVOL; - //weight[0][1][1] = q[i] * xi[0] * eta[1] * zeta[1] * invVOL; - //weight[1][0][0] = q[i] * xi[1] * eta[0] * zeta[0] * invVOL; - //weight[1][0][1] = q[i] * xi[1] * eta[0] * zeta[1] * invVOL; - //weight[1][1][0] = q[i] * xi[1] * eta[1] * zeta[0] * invVOL; - //weight[1][1][1] = q[i] * xi[1] * eta[1] * zeta[1] * invVOL; // add charge density speciesMoments.addRho(weight, ix, iy, iz); // add current density - X From c3b88b1c27d892e58970ca3ae0115083f0ddf178 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 23 Jul 2013 10:27:07 +0200 Subject: [PATCH 022/118] removed unused code obsoleted under iss#41 --- include/Moments.h | 92 --------------------------- main/iPic3Dlib.cpp | 2 +- particles/Particles3Dcomm.cpp | 114 ++++------------------------------ 3 files changed, 13 insertions(+), 195 deletions(-) diff --git a/include/Moments.h b/include/Moments.h index e90e01db..cb0018aa 100644 --- a/include/Moments.h +++ b/include/Moments.h @@ -57,98 +57,6 @@ class Moments { void init(int nx_, int ny_, int nz_); ~Moments(); void set_to_zero(); - void addRho(double weight[][2][2], int X, int Y, int Z); - void addJx(double weight[][2][2], int X, int Y, int Z); - void addJy(double weight[][2][2], int X, int Y, int Z); - void addJz(double weight[][2][2], int X, int Y, int Z); - - void addPxx(double weight[][2][2], int X, int Y, int Z); - void addPxy(double weight[][2][2], int X, int Y, int Z); - void addPxz(double weight[][2][2], int X, int Y, int Z); - void addPyy(double weight[][2][2], int X, int Y, int Z); - void addPyz(double weight[][2][2], int X, int Y, int Z); - void addPzz(double weight[][2][2], int X, int Y, int Z); }; -/** add an amount of charge density to charge density field at node X,Y */ -inline void Moments::addRho(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++) { - rho[X - i][Y - j][Z - k] += weight[i][j][k]; - } -} -/** add an amount of charge density to current density - direction X to current density field on the node*/ -inline void Moments::addJx(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++){ - Jx[X - i][Y - j][Z - k] += weight[i][j][k]; - } -} -/** add an amount of current density - direction Y to current density field on the node */ -inline void Moments::addJy(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++){ - Jy[X - i][Y - j][Z - k] += weight[i][j][k]; - } -} -/** add an amount of current density - direction Z to current density field on the node */ -inline void Moments::addJz(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++){ - Jz[X - i][Y - j][Z - k] += weight[i][j][k]; - } -} -/** add an amount of pressure density - direction XX to current density field on the node */ -inline void Moments::addPxx(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++){ - pXX[X - i][Y - j][Z - k] += weight[i][j][k]; - } -} -/** add an amount of pressure density - direction XY to current density field on the node*/ -inline void Moments::addPxy(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++){ - pXY[X - i][Y - j][Z - k] += weight[i][j][k]; - } -} -/** add an amount of pressure density - direction XZ to current density field on the node */ -inline void Moments::addPxz(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++){ - pXZ[X - i][Y - j][Z - k] += weight[i][j][k]; - } -} -/** add an amount of pressure density - direction YY to current density field on the node*/ -inline void Moments::addPyy(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++){ - pYY[X - i][Y - j][Z - k] += weight[i][j][k]; - } -} -/** add an amount of pressure density - direction YZ to current density field on the node */ -inline void Moments::addPyz(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++){ - pYZ[X - i][Y - j][Z - k] += weight[i][j][k]; - } -} -/** add an amount of pressure density - direction ZZ to current density field on the node */ -inline void Moments::addPzz(double weight[][2][2], int X, int Y, int Z) { - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 2; k++){ - pZZ[X - i][Y - j][Z - k] += weight[i][j][k]; - } -} - #endif diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 852817b5..b05ad584 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -177,7 +177,7 @@ void c_Solver::CalculateField() { { // interpolate particles to grid nodes EMf->sumMoments(part[i], grid, vct); - //part[i].interpP2G(EMf, grid, vct); + //part[i].interpP2G(EMf, grid, vct); // the old, slow way. } EMf->sumOverSpecies(vct); // sum all over the species diff --git a/particles/Particles3Dcomm.cpp b/particles/Particles3Dcomm.cpp index 2867068f..3e5c1ede 100644 --- a/particles/Particles3Dcomm.cpp +++ b/particles/Particles3Dcomm.cpp @@ -283,7 +283,7 @@ void Particles3Dcomm::allocate(int species, CollectiveIO * col, VirtualTopology3 } -// move this to EMfields3D class +// A much faster version of this is at EMfields3D::sumMoments // void Particles3Dcomm::interpP2G(Field * EMf, Grid * grid, VirtualTopology3D * vct) { const double inv_dx = 1.0 / dx; @@ -297,14 +297,8 @@ void Particles3Dcomm::interpP2G(Field * EMf, Grid * grid, VirtualTopology3D * vc // could first apply an efficient parallel sorting algorithm // to the particles and then accumulate moments in smaller // subarrays. - //#ifdef _OPENMP - #pragma omp parallel { - int thread_num = omp_get_thread_num(); - Moments& speciesMoments = EMf->fetch_momentsArray(thread_num); - //Moments speciesMoments(nxn,nyn,nzn,invVOL); - speciesMoments.set_to_zero(); - #pragma omp for + assert_le(nop,INT_MAX); // else would need to use long long for (int i = 0; i < nop; i++) { const int ix = 2 + int (floor((x[i] - xstart) * inv_dx)); @@ -325,147 +319,63 @@ void Particles3Dcomm::interpP2G(Field * EMf, Grid * grid, VirtualTopology3D * vc weight[ii][jj][kk] = q[i] * xi[ii] * eta[jj] * zeta[kk] * invVOL; } // add charge density - speciesMoments.addRho(weight, ix, iy, iz); + EMf->addRho(weight, ix, iy, iz, ns); // add current density - X for (int ii = 0; ii < 2; ii++) for (int jj = 0; jj < 2; jj++) for (int kk = 0; kk < 2; kk++) temp[ii][jj][kk] = u[i] * weight[ii][jj][kk]; - speciesMoments.addJx(temp, ix, iy, iz); + EMf->addJx(temp, ix, iy, iz, ns); // add current density - Y for (int ii = 0; ii < 2; ii++) for (int jj = 0; jj < 2; jj++) for (int kk = 0; kk < 2; kk++) temp[ii][jj][kk] = v[i] * weight[ii][jj][kk]; - speciesMoments.addJy(temp, ix, iy, iz); + EMf->addJy(temp, ix, iy, iz, ns); // add current density - Z for (int ii = 0; ii < 2; ii++) for (int jj = 0; jj < 2; jj++) for (int kk = 0; kk < 2; kk++) temp[ii][jj][kk] = w[i] * weight[ii][jj][kk]; - speciesMoments.addJz(temp, ix, iy, iz); + EMf->addJz(temp, ix, iy, iz, ns); // Pxx - add pressure tensor for (int ii = 0; ii < 2; ii++) for (int jj = 0; jj < 2; jj++) for (int kk = 0; kk < 2; kk++) temp[ii][jj][kk] = u[i] * u[i] * weight[ii][jj][kk]; - speciesMoments.addPxx(temp, ix, iy, iz); + EMf->addPxx(temp, ix, iy, iz, ns); // Pxy - add pressure tensor for (int ii = 0; ii < 2; ii++) for (int jj = 0; jj < 2; jj++) for (int kk = 0; kk < 2; kk++) temp[ii][jj][kk] = u[i] * v[i] * weight[ii][jj][kk]; - speciesMoments.addPxy(temp, ix, iy, iz); + EMf->addPxy(temp, ix, iy, iz, ns); // Pxz - add pressure tensor for (int ii = 0; ii < 2; ii++) for (int jj = 0; jj < 2; jj++) for (int kk = 0; kk < 2; kk++) temp[ii][jj][kk] = u[i] * w[i] * weight[ii][jj][kk]; - speciesMoments.addPxz(temp, ix, iy, iz); + EMf->addPxz(temp, ix, iy, iz, ns); // Pyy - add pressure tensor for (int ii = 0; ii < 2; ii++) for (int jj = 0; jj < 2; jj++) for (int kk = 0; kk < 2; kk++) temp[ii][jj][kk] = v[i] * v[i] * weight[ii][jj][kk]; - speciesMoments.addPyy(temp, ix, iy, iz); + EMf->addPyy(temp, ix, iy, iz, ns); // Pyz - add pressure tensor for (int ii = 0; ii < 2; ii++) for (int jj = 0; jj < 2; jj++) for (int kk = 0; kk < 2; kk++) temp[ii][jj][kk] = v[i] * w[i] * weight[ii][jj][kk]; - speciesMoments.addPyz(temp, ix, iy, iz); + EMf->addPyz(temp, ix, iy, iz, ns); // Pzz - add pressure tensor for (int ii = 0; ii < 2; ii++) for (int jj = 0; jj < 2; jj++) for (int kk = 0; kk < 2; kk++) temp[ii][jj][kk] = w[i] * w[i] * weight[ii][jj][kk]; - speciesMoments.addPzz(temp, ix, iy, iz); + EMf->addPzz(temp, ix, iy, iz, ns); } - // change this to allow more parallelization after implementing array class - #pragma omp critical - EMf->addToSpeciesMoments(speciesMoments,ns); } - //#else - //{ - // assert_le(nop,INT_MAX); // else would need to use long long - // for (int i = 0; i < nop; i++) - // { - // const int ix = 2 + int (floor((x[i] - xstart) * inv_dx)); - // const int iy = 2 + int (floor((y[i] - ystart) * inv_dy)); - // const int iz = 2 + int (floor((z[i] - zstart) * inv_dz)); - // double temp[2][2][2]; - // double xi[2], eta[2], zeta[2]; - // xi[0] = x[i] - grid->getXN(ix - 1, iy, iz); - // eta[0] = y[i] - grid->getYN(ix, iy - 1, iz); - // zeta[0] = z[i] - grid->getZN(ix, iy, iz - 1); - // xi[1] = grid->getXN(ix, iy, iz) - x[i]; - // eta[1] = grid->getYN(ix, iy, iz) - y[i]; - // zeta[1] = grid->getZN(ix, iy, iz) - z[i]; - // double weight[2][2][2]; - // for (int ii = 0; ii < 2; ii++) - // for (int jj = 0; jj < 2; jj++) - // for (int kk = 0; kk < 2; kk++) { - // weight[ii][jj][kk] = q[i] * xi[ii] * eta[jj] * zeta[kk] * invVOL; - // } - // // add charge density - // EMf->addRho(weight, ix, iy, iz, ns); - // // add current density - X - // for (int ii = 0; ii < 2; ii++) - // for (int jj = 0; jj < 2; jj++) - // for (int kk = 0; kk < 2; kk++) - // temp[ii][jj][kk] = u[i] * weight[ii][jj][kk]; - // EMf->addJx(temp, ix, iy, iz, ns); - // // add current density - Y - // for (int ii = 0; ii < 2; ii++) - // for (int jj = 0; jj < 2; jj++) - // for (int kk = 0; kk < 2; kk++) - // temp[ii][jj][kk] = v[i] * weight[ii][jj][kk]; - // EMf->addJy(temp, ix, iy, iz, ns); - // // add current density - Z - // for (int ii = 0; ii < 2; ii++) - // for (int jj = 0; jj < 2; jj++) - // for (int kk = 0; kk < 2; kk++) - // temp[ii][jj][kk] = w[i] * weight[ii][jj][kk]; - // EMf->addJz(temp, ix, iy, iz, ns); - // // Pxx - add pressure tensor - // for (int ii = 0; ii < 2; ii++) - // for (int jj = 0; jj < 2; jj++) - // for (int kk = 0; kk < 2; kk++) - // temp[ii][jj][kk] = u[i] * u[i] * weight[ii][jj][kk]; - // EMf->addPxx(temp, ix, iy, iz, ns); - // // Pxy - add pressure tensor - // for (int ii = 0; ii < 2; ii++) - // for (int jj = 0; jj < 2; jj++) - // for (int kk = 0; kk < 2; kk++) - // temp[ii][jj][kk] = u[i] * v[i] * weight[ii][jj][kk]; - // EMf->addPxy(temp, ix, iy, iz, ns); - // // Pxz - add pressure tensor - // for (int ii = 0; ii < 2; ii++) - // for (int jj = 0; jj < 2; jj++) - // for (int kk = 0; kk < 2; kk++) - // temp[ii][jj][kk] = u[i] * w[i] * weight[ii][jj][kk]; - // EMf->addPxz(temp, ix, iy, iz, ns); - // // Pyy - add pressure tensor - // for (int ii = 0; ii < 2; ii++) - // for (int jj = 0; jj < 2; jj++) - // for (int kk = 0; kk < 2; kk++) - // temp[ii][jj][kk] = v[i] * v[i] * weight[ii][jj][kk]; - // EMf->addPyy(temp, ix, iy, iz, ns); - // // Pyz - add pressure tensor - // for (int ii = 0; ii < 2; ii++) - // for (int jj = 0; jj < 2; jj++) - // for (int kk = 0; kk < 2; kk++) - // temp[ii][jj][kk] = v[i] * w[i] * weight[ii][jj][kk]; - // EMf->addPyz(temp, ix, iy, iz, ns); - // // Pzz - add pressure tensor - // for (int ii = 0; ii < 2; ii++) - // for (int jj = 0; jj < 2; jj++) - // for (int kk = 0; kk < 2; kk++) - // temp[ii][jj][kk] = w[i] * w[i] * weight[ii][jj][kk]; - // EMf->addPzz(temp, ix, iy, iz, ns); - // } - //} - //#endif // communicate contribution from ghost cells EMf->communicateGhostP2G(ns, 0, 0, 0, 0, vct); } From 8556e6c69183e2e3db9f9c21b3611ea97b5ef5af Mon Sep 17 00:00:00 2001 From: eajohnson Date: Thu, 1 Aug 2013 14:23:28 +0200 Subject: [PATCH 023/118] issue #42: Support -fno-exceptions: replacing throw with eprintf --- ConfigFile/src/ConfigFile.cpp | 4 +- ConfigFile/src/ConfigFile.h | 4 +- PSKOutput3D/PSKhdf5adaptor.cpp | 300 ++++++++++++++++++--------------- include/ConfigFile.h | 4 +- include/PSKOutput.h | 41 +++-- 5 files changed, 199 insertions(+), 154 deletions(-) diff --git a/ConfigFile/src/ConfigFile.cpp b/ConfigFile/src/ConfigFile.cpp index 7f6f1f99..ed9f6b53 100644 --- a/ConfigFile/src/ConfigFile.cpp +++ b/ConfigFile/src/ConfigFile.cpp @@ -1,6 +1,7 @@ // ConfigFile.cpp #include "ConfigFile.h" +#include "errors.h" using std::string; @@ -11,7 +12,8 @@ ConfigFile::ConfigFile(string filename, string delimiter, string comment, string std::ifstream in(filename.c_str()); if (!in) - throw file_not_found(filename); + eprintf("file not found: %s", filename.c_str()); + //throw file_not_found(filename); in >> (*this); } diff --git a/ConfigFile/src/ConfigFile.h b/ConfigFile/src/ConfigFile.h index 4de95342..d8d8108a 100644 --- a/ConfigFile/src/ConfigFile.h +++ b/ConfigFile/src/ConfigFile.h @@ -48,6 +48,7 @@ #include #include #include +#include "errors.h" // for eprintf using std::string; @@ -175,7 +176,8 @@ template < class T > T ConfigFile::read(const string & key) const { // Read the value corresponding to key mapci p = myContents.find(key); if (p == myContents.end()) - throw key_not_found(key); + eprintf("key not found: %s", key.c_str()); + //throw key_not_found(key); return string_as_T < T > (p->second); } diff --git a/PSKOutput3D/PSKhdf5adaptor.cpp b/PSKOutput3D/PSKhdf5adaptor.cpp index 33c83115..8a4d2d14 100644 --- a/PSKOutput3D/PSKhdf5adaptor.cpp +++ b/PSKOutput3D/PSKhdf5adaptor.cpp @@ -1,5 +1,6 @@ #include +#include "errors.h" #include "PSKhdf5adaptor.h" using namespace PSK; @@ -19,8 +20,8 @@ void HDF5OutputAdaptor::get_dataset_context(const std::string & name, std::vecto hid_array.resize(ncompx); if (ncompx == 0) { - throw PSK::OutputException("HDF5OutputAdaptor::get_dataset_context()>> zero name components"); - + //throw PSK::OutputException("HDF5OutputAdaptor::get_dataset_context()>> zero name components"); + eprintf(">> zero name components"); } else if (ncompx == 1) { hid_array[0] = _hdf5_file_id; @@ -59,7 +60,8 @@ void HDF5OutputAdaptor::get_dataset_context(const std::string & name, std::vecto // std::cout << "group create failed \n" ; - throw PSK::OutputException("Failed to open/create group for <" + name + "> at element <" + name_components[i] + ">", "HDF5OutputAdaptor::get_dataset_context()"); + //throw PSK::OutputException("Failed to open/create group for <" + name + "> at element <" + name_components[i] + ">", "HDF5OutputAdaptor::get_dataset_context()"); + eprintf("Failed to open/create group for <%s> at element <%s>", name.c_str(), name_components[i].c_str()); } } @@ -78,7 +80,8 @@ void HDF5OutputAdaptor::get_dataset_context(const std::string & name, std::vecto */ std::string HDF5OutputAdaptor::purify_object_name(const std::string & objname) { if (objname.length() == 0) - throw PSK::OutputException("Zero length tag name", "HDF5OutputAdaptor::purify_object_name()"); + //throw PSK::OutputException("Zero length tag name", "HDF5OutputAdaptor::purify_object_name()"); + eprintf("Zero length tag name"); return objname[0] != '/' ? "/" + objname : objname; @@ -118,12 +121,13 @@ void HDF5OutputAdaptor::open(const std::string & name) { _hdf5_file_id = H5Fcreate(name.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); if (_hdf5_file_id <= 0) { - PSK::OutputException e("H5FCreate fails", "HDF5OutputAdaptor::open2()"); + eprintf("H5FCreate fails"); + //PSK::OutputException e("H5FCreate fails", "HDF5OutputAdaptor::open2()"); // if using H5F_ACC_EXCL // e.push( "Using H5F_ACC_EXCL: Check if file " +name + " already exists" ); - throw e; + //throw e; } _hdf5_file_name = name; @@ -155,9 +159,10 @@ void HDF5OutputAdaptor::open_append(const std::string & name) { } if (_hdf5_file_id <= 0) { - PSK::OutputException e("H5Fopen fails", "HDF5OutputAdaptor::open_append()"); + eprintf("H5Fopen fails"); + //PSK::OutputException e("H5Fopen fails", "HDF5OutputAdaptor::open_append()"); - throw e; + //throw e; } _hdf5_file_name = name; @@ -172,8 +177,9 @@ void HDF5OutputAdaptor::close(void) { herr_t hdf5err = H5Fclose(_hdf5_file_id); if (hdf5err < 0) { - PSK::OutputException e("HDF5OutputAdaptor::close()>> H5FClose fails"); - throw e; + eprintf("H5FClose fails"); + //PSK::OutputException e("HDF5OutputAdaptor::close()>> H5FClose fails"); + //throw e; } _hdf5_file_name.clear(); @@ -188,7 +194,7 @@ void HDF5OutputAdaptor::close(void) { * */ void HDF5OutputAdaptor::write(const std::string & tag, int i_value) { - try { + //try { std::string ptag = purify_object_name(tag); std::vector < hid_t > hid_array; @@ -213,8 +219,9 @@ void HDF5OutputAdaptor::write(const std::string & tag, int i_value) { } if (hdf5err < 0) { - PSK::OutputException e("make_dataset fails for " + tag, "HDF5OutputAdaptor::write(int)"); - throw e; + eprintf("make_dataset fails for %s", tag.c_str()); + //PSK::OutputException e("make_dataset fails for " + tag, "HDF5OutputAdaptor::write(int)"); + //throw e; } // close groups, if any, but don't try to close the file id at [0] @@ -223,14 +230,14 @@ void HDF5OutputAdaptor::write(const std::string & tag, int i_value) { delete[]hdf5dims; - } catch(PSK::Exception & e) { - e.push("In HDF5OutputAdaptor::write(int)"); - throw e; - } + //} catch(PSK::Exception & e) { + // e.push("In HDF5OutputAdaptor::write(int)"); + // throw e; + //} } // new long writing void HDF5OutputAdaptor::write(const std::string & tag, long i_value) { - try { + //try { std::string ptag = purify_object_name(tag); std::vector < hid_t > hid_array; @@ -255,8 +262,9 @@ void HDF5OutputAdaptor::write(const std::string & tag, long i_value) { } if (hdf5err < 0) { - PSK::OutputException e("make_dataset fails for " + tag, "HDF5OutputAdaptor::write(long)"); - throw e; + eprintf("make_dataset fails for %s", tag.c_str()); + //PSK::OutputException e("make_dataset fails for " + tag, "HDF5OutputAdaptor::write(long)"); + //throw e; } // close groups, if any, but don't try to close the file id at [0] @@ -265,18 +273,19 @@ void HDF5OutputAdaptor::write(const std::string & tag, long i_value) { delete[]hdf5dims; - } catch(PSK::Exception & e) { - e.push("In HDF5OutputAdaptor::write(long)"); - throw e; - } + //} catch(PSK::Exception & e) { + // e.push("In HDF5OutputAdaptor::write(long)"); + // throw e; + //} } // void HDF5OutputAdaptor::write(const std::string & tag, const Dimens dimens, const int *i_array) { - try { + //try { if (dimens.size() == 0) { - PSK::OutputException e("Zero Dimens size", "HDF5OutputAdaptor::write(int* array)"); - throw e; + eprintf("Zero Dimens size"); + //PSK::OutputException e("Zero Dimens size", "HDF5OutputAdaptor::write(int* array)"); + //throw e; } std::string ptag = purify_object_name(tag); @@ -304,25 +313,27 @@ void HDF5OutputAdaptor::write(const std::string & tag, const Dimens dimens, cons } if (hdf5err < 0) { - PSK::OutputException e("make_dataset fails for " + tag, "HDF5OutputAdaptor::write(int* array)"); - throw e; + eprintf("make_dataset fails for %s", tag.c_str()); + //PSK::OutputException e("make_dataset fails for " + tag, "HDF5OutputAdaptor::write(int* array)"); + //throw e; } // close groups, if any, but don't try to close the file id at [0] for (int i = hid_array.size() - 1; i > 0; --i) hdf5err = H5Gclose(hid_array[i]); - } catch(PSK::Exception & e) { - e.push("In HDF5OutputAdaptor::write(int* array)"); - throw e; - } + //} catch(PSK::Exception & e) { + // e.push("In HDF5OutputAdaptor::write(int* array)"); + // throw e; + //} } void HDF5OutputAdaptor::write(const std::string & tag, const Dimens dimens, const long *i_array) { - try { + //try { if (dimens.size() == 0) { - PSK::OutputException e("Zero Dimens size", "HDF5OutputAdaptor::write(long* array)"); - throw e; + eprintf("Zero Dimens size"); + //PSK::OutputException e("Zero Dimens size", "HDF5OutputAdaptor::write(long* array)"); + //throw e; } std::string ptag = purify_object_name(tag); @@ -341,55 +352,57 @@ void HDF5OutputAdaptor::write(const std::string & tag, const Dimens dimens, cons dimens.size(), hdf5dims, i_array); if (hdf5err < 0) { - PSK::OutputException e("make_dataset fails for " + tag, "HDF5OutputAdaptor::write(long* array)"); - throw e; + eprintf("make_dataset fails for %s", tag.c_str()); + //PSK::OutputException e("make_dataset fails for " + tag, "HDF5OutputAdaptor::write(long* array)"); + //throw e; } // close groups, if any, but don't try to close the file id at [0] for (int i = hid_array.size() - 1; i > 0; --i) hdf5err = H5Gclose(hid_array[i]); - } catch(PSK::Exception & e) { - e.push("In HDF5OutputAdaptor::write(long* array)"); - throw e; - } + //} catch(PSK::Exception & e) { + // e.push("In HDF5OutputAdaptor::write(long* array)"); + // throw e; + //} } void HDF5OutputAdaptor::write(const std::string & tag, const Dimens dimens, const std::vector < int >&i_array) { - try { + //try { int n = dimens.nels(); int *i_array_p = new int[n]; for (int i = 0; i < n; ++i) i_array_p[i] = i_array[i]; write(tag, dimens, i_array_p); delete[]i_array_p; - } catch(PSK::Exception & e) { - e.push("In HDF5OutputAdaptor::write(vector array)"); - throw e; - } + //} catch(PSK::Exception & e) { + // e.push("In HDF5OutputAdaptor::write(vector array)"); + // throw e; + //} } void HDF5OutputAdaptor::write(const std::string & tag, const Dimens dimens, const std::vector < long >&i_array) { - try { + //try { int n = dimens.nels(); long *i_array_p = new long[n]; for (int i = 0; i < n; ++i) i_array_p[i] = i_array[i]; write(tag, dimens, i_array_p); delete[]i_array_p; - } catch(PSK::Exception & e) { - e.push("In HDF5OutputAdaptor::write(vector array)"); - throw e; - } + //} catch(PSK::Exception & e) { + // e.push("In HDF5OutputAdaptor::write(vector array)"); + // throw e; + //} } void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, const int ***i_array) { if (dimens.size() != 3) { - PSK::OutputException e("Dimens size not 3 for object " + objname, "HDF5OutputAdaptor::write(int*** array)"); - throw e; + eprintf("Dimens size not 3 for object %s", objname.c_str()); + //PSK::OutputException e("Dimens size not 3 for object " + objname, "HDF5OutputAdaptor::write(int*** array)"); + //throw e; } - try { + //try { int nels = dimens.nels(); int *i_array_p = new int[nels]; const int di = dimens[0]; @@ -402,10 +415,10 @@ void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, i_array_p[i * djk + j * dk + k] = i_array[i][j][k]; write(objname, dimens, i_array_p); delete[]i_array_p; - } catch(PSK::Exception & e) { - e.push("In HDF5OutputAdaptor::write(int*** array)"); - throw e; - } + //} catch(PSK::Exception & e) { + // e.push("In HDF5OutputAdaptor::write(int*** array)"); + // throw e; + //} } @@ -418,7 +431,7 @@ void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, * */ void HDF5OutputAdaptor::write(const std::string & tag, float f_value) { - try { + //try { std::string ptag = purify_object_name(tag); std::vector < hid_t > hid_array; @@ -434,8 +447,9 @@ void HDF5OutputAdaptor::write(const std::string & tag, float f_value) { 1, hdf5dims, &f_value); if (hdf5err < 0) { - PSK::OutputException e("make_dataset fails for " + tag, "HDF5OutputAdaptor::write(float)"); - throw e; + eprintf("make_dataset fails for %s", tag.c_str()); + //PSK::OutputException e("make_dataset fails for " + tag, "HDF5OutputAdaptor::write(float)"); + //throw e; } // close groups, if any, but don't try to close the file id at [0] @@ -444,17 +458,18 @@ void HDF5OutputAdaptor::write(const std::string & tag, float f_value) { delete[]hdf5dims; - } catch(PSK::Exception & e) { - e.push("In HDF5OutputAdaptor::write(float)"); - throw e; - } + //} catch(PSK::Exception & e) { + // e.push("In HDF5OutputAdaptor::write(float)"); + // throw e; + //} } void HDF5OutputAdaptor::write(const std::string & tag, const Dimens dimens, const float *f_array) { - try { + //try { if (dimens.size() == 0) { - PSK::OutputException e("Zero Dimens size", "HDF5OutputAdaptor::write(float* array)"); - throw e; + eprintf("Zero Dimens size"); + //PSK::OutputException e("Zero Dimens size", "HDF5OutputAdaptor::write(float* array)"); + //throw e; } std::string ptag = purify_object_name(tag); @@ -473,41 +488,43 @@ void HDF5OutputAdaptor::write(const std::string & tag, const Dimens dimens, cons dimens.size(), hdf5dims, f_array); if (hdf5err < 0) { - PSK::OutputException e("make_dataset fails for " + tag, "HDF5OutputAdaptor::write(float* array)"); - throw e; + eprintf("make_dataset fails for %s", tag.c_str()); + //PSK::OutputException e("make_dataset fails for " + tag, "HDF5OutputAdaptor::write(float* array)"); + //throw e; } // close groups, if any, but don't try to close the file id at [0] for (int i = hid_array.size() - 1; i > 0; --i) hdf5err = H5Gclose(hid_array[i]); - } catch(PSK::Exception & e) { - e.push("In HDF5OutputAdaptor::write(float* array)"); - throw e; - } + //} catch(PSK::Exception & e) { + // e.push("In HDF5OutputAdaptor::write(float* array)"); + // throw e; + //} } void HDF5OutputAdaptor::write(const std::string & tag, const Dimens dimens, const std::vector < float >&f_array) { - try { + //try { int n = dimens.nels(); float *f_array_p = new float[n]; for (int i = 0; i < n; ++i) f_array_p[i] = f_array[i]; write(tag, dimens, f_array_p); delete[]f_array_p; - } catch(PSK::Exception & e) { - e.push("In HDF5OutputAdaptor::write(vector array)"); - throw e; - } + //} catch(PSK::Exception & e) { + // e.push("In HDF5OutputAdaptor::write(vector array)"); + // throw e; + //} } void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, const float ***f_array) { if (dimens.size() != 3) { - PSK::OutputException e("Dimens size not 3 for object " + objname, "HDF5OutputAdaptor::write(float*** array)"); - throw e; + eprintf("Dimens size not 3 for object %s", objname.c_str()); + //PSK::OutputException e("Dimens size not 3 for object " + objname, "HDF5OutputAdaptor::write(float*** array)"); + //throw e; } - try { + //try { int nels = dimens.nels(); float *f_array_p = new float[nels]; const int di = dimens[0]; @@ -520,10 +537,10 @@ void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, f_array_p[i * djk + j * dk + k] = f_array[i][j][k]; write(objname, dimens, f_array_p); delete[]f_array_p; - } catch(PSK::Exception & e) { - e.push("In HDF5OutputAdaptor::write(float*** array)"); - throw e; - } + //} catch(PSK::Exception & e) { + // e.push("In HDF5OutputAdaptor::write(float*** array)"); + // throw e; + //} } @@ -536,7 +553,7 @@ void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, * */ void HDF5OutputAdaptor::write(const std::string & tag, double d_value) { - try { + //try { std::string ptag = purify_object_name(tag); std::vector < hid_t > hid_array; @@ -561,8 +578,9 @@ void HDF5OutputAdaptor::write(const std::string & tag, double d_value) { } if (hdf5err < 0) { - PSK::OutputException e("make_dataset fails for " + tag, "HDF5OutputAdaptor::write(double)"); - throw e; + eprintf("make_dataset fails for ", tag.c_str()); + //PSK::OutputException e("make_dataset fails for " + tag, "HDF5OutputAdaptor::write(double)"); + //throw e; } // close groups, if any, but don't try to close the file id at [0] @@ -571,17 +589,18 @@ void HDF5OutputAdaptor::write(const std::string & tag, double d_value) { delete[]hdf5dims; - } catch(PSK::Exception & e) { - e.push("In HDF5OutputAdaptor::write(double)"); - throw e; - } + //} catch(PSK::Exception & e) { + // e.push("In HDF5OutputAdaptor::write(double)"); + // throw e; + //} } void HDF5OutputAdaptor::write(const std::string & tag, const Dimens dimens, const double *d_array) { - try { + //try { if (dimens.size() == 0) { - PSK::OutputException e("Zero Dimens size", "HDF5OutputAdaptor::write(double* array)"); - throw e; + eprintf("Zero Dimens size"); + //PSK::OutputException e("Zero Dimens size", "HDF5OutputAdaptor::write(double* array)"); + //throw e; } std::string ptag = purify_object_name(tag); @@ -610,41 +629,43 @@ void HDF5OutputAdaptor::write(const std::string & tag, const Dimens dimens, cons if (hdf5err < 0) { - PSK::OutputException e("make_dataset fails for " + tag, "HDF5OutputAdaptor::write(double* array)"); - throw e; + eprintf("make_dataset fails for %s", tag.c_str()); + //PSK::OutputException e("make_dataset fails for " + tag, "HDF5OutputAdaptor::write(double* array)"); + //throw e; } // close groups, if any, but don't try to close the file id at [0] for (int i = hid_array.size() - 1; i > 0; --i) hdf5err = H5Gclose(hid_array[i]); - } catch(PSK::Exception & e) { - e.push("In HDF5OutputAdaptor::write(double* array)"); - throw e; - } + //} catch(PSK::Exception & e) { + // e.push("In HDF5OutputAdaptor::write(double* array)"); + // throw e; + //} } void HDF5OutputAdaptor::write(const std::string & tag, const Dimens dimens, const std::vector < double >&d_array) { - try { + //try { int n = dimens.nels(); double *d_array_p = new double[n]; for (int i = 0; i < n; ++i) d_array_p[i] = d_array[i]; write(tag, dimens, d_array_p); delete[]d_array_p; - } catch(PSK::Exception & e) { - e.push("In HDF5OutputAdaptor::write(vector array)"); - throw e; - } + //} catch(PSK::Exception & e) { + // e.push("In HDF5OutputAdaptor::write(vector array)"); + // throw e; + //} } void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, double ***d_array) { if (dimens.size() != 3) { - PSK::OutputException e("Dimens size not 3 for object " + objname, "HDF5OutputAdaptor::write(double*** array)"); - throw e; + eprintf("Dimens size not 3 for object %s", objname.c_str()); + //PSK::OutputException e("Dimens size not 3 for object " + objname, "HDF5OutputAdaptor::write(double*** array)"); + //throw e; } - try { + //try { int nels = dimens.nels(); double *d_array_p = new double[nels]; const int di = dimens[0]; @@ -666,20 +687,21 @@ void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, } write(objname, dimens, d_array_p); delete[]d_array_p; - } - catch(PSK::Exception & e) { - e.push("In HDF5OutputAdaptor::write(double*** array)"); - throw e; - } + //} + //catch(PSK::Exception & e) { + // e.push("In HDF5OutputAdaptor::write(double*** array)"); + // throw e; + //} } void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, const int ns, double ****d_array) { if (dimens.size() != 3) { - PSK::OutputException e("Dimens size not 3 for object " + objname, "HDF5OutputAdaptor::write(double**** array)"); - throw e; + eprintf("Dimens size not 3 for object %s", objname.c_str()); + //PSK::OutputException e("Dimens size not 3 for object " + objname, "HDF5OutputAdaptor::write(double**** array)"); + //throw e; } - try { + //try { int nels = dimens.nels(); double *d_array_p = new double[nels]; const int di = dimens[0]; @@ -700,20 +722,21 @@ void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, } write(objname, dimens, d_array_p); delete[]d_array_p; - } - catch(PSK::Exception & e) { - e.push("In HDF5OutputAdaptor::write(double**** array)"); - throw e; - } + //} + //catch(PSK::Exception & e) { + // e.push("In HDF5OutputAdaptor::write(double**** array)"); + // throw e; + //} } void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, double **d_array) { if (dimens.size() != 2) { - PSK::OutputException e("Dimens size not 2 for object " + objname, "HDF5OutputAdaptor::write(double** array)"); - throw e; + eprintf("Dimens size not 2 for object %s", objname.c_str()); + //PSK::OutputException e("Dimens size not 2 for object " + objname, "HDF5OutputAdaptor::write(double** array)"); + //throw e; } - try { + //try { int nels = dimens.nels(); double *d_array_p = new double[nels]; const int di = dimens[0]; @@ -723,19 +746,20 @@ void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, d_array_p[i * dj + j] = d_array[i + 1][j + 1]; // I am not writing ghost cells write(objname, dimens, d_array_p); delete[]d_array_p; - } catch(PSK::Exception & e) { - e.push("In HDF5OutputAdaptor::write(double** array)"); - throw e; - } + //} catch(PSK::Exception & e) { + // e.push("In HDF5OutputAdaptor::write(double** array)"); + // throw e; + //} } void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, const int ns, double ***d_array) { if (dimens.size() != 2) { - PSK::OutputException e("Dimens size not 2 for object " + objname, "HDF5OutputAdaptor::write(double*** array)"); - throw e; + eprintf("Dimens size not 2 for object %s", objname.c_str()); + //PSK::OutputException e("Dimens size not 2 for object " + objname, "HDF5OutputAdaptor::write(double*** array)"); + //throw e; } - try { + //try { int nels = dimens.nels(); double *d_array_p = new double[nels]; const int di = dimens[0]; @@ -745,8 +769,8 @@ void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, d_array_p[i * dj + j] = d_array[i + 1][j + 1][ns]; // I am not writing ghost cells write(objname, dimens, d_array_p); delete[]d_array_p; - } catch(PSK::Exception & e) { - e.push("In HDF5OutputAdaptor::write(double*** array)"); - throw e; - } + //} catch(PSK::Exception & e) { + // e.push("In HDF5OutputAdaptor::write(double*** array)"); + // throw e; + //} } diff --git a/include/ConfigFile.h b/include/ConfigFile.h index 4de95342..79008be2 100644 --- a/include/ConfigFile.h +++ b/include/ConfigFile.h @@ -48,6 +48,7 @@ #include #include #include +#include "errors.h" using std::string; @@ -175,7 +176,8 @@ template < class T > T ConfigFile::read(const string & key) const { // Read the value corresponding to key mapci p = myContents.find(key); if (p == myContents.end()) - throw key_not_found(key); + eprintf("key not found: %s", key.c_str()); + //throw key_not_found(key); return string_as_T < T > (p->second); } diff --git a/include/PSKOutput.h b/include/PSKOutput.h index 604387cf..624fad4c 100644 --- a/include/PSKOutput.h +++ b/include/PSKOutput.h @@ -13,6 +13,7 @@ developers: D. Burgess, June/July 2006 #include #include +#include "errors.h" #include "PSKException.h" #include "Particles.h" #include "Field.h" @@ -103,49 +104,63 @@ namespace PSK { public: OutputAdaptor(void) {; } virtual void open(const std::string & outf) { - throw OutputException("Function not implemented", "PSK::OutputAdaptor::open"); + eprintf("Function not implemented"); + eprintf("Function not implemented"); + //throw OutputException("Function not implemented", "PSK::OutputAdaptor::open"); } virtual void close(void) { - throw OutputException("Function not implemented", "PSK::OutputAdaptor::close"); + eprintf("Function not implemented"); + //throw OutputException("Function not implemented", "PSK::OutputAdaptor::close"); } // write int functions virtual void write(const std::string & objname, int i) { - throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(int)"); + eprintf("Function not implemented"); + //throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(int)"); } virtual void write(const std::string & objname, const Dimens dimens, const int *i_array) { - throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(int* array)"); + eprintf("Function not implemented"); + //throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(int* array)"); } virtual void write(const std::string & objname, const Dimens dimens, const long *i_array) { - throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(long* array)"); + eprintf("Function not implemented"); + //throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(long* array)"); } virtual void write(const std::string & objname, const Dimens dimens, const std::vector < int >&i_array) { - throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(vector array)"); + eprintf("Function not implemented"); + //throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(vector array)"); } virtual void write(const std::string & objname, const Dimens dimens, const std::vector < long >&i_array) { - throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(vector array)"); + eprintf("Function not implemented"); + //throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(vector array)"); } // write float functions virtual void write(const std::string & objname, float f) { - throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(float)"); + eprintf("Function not implemented"); + //throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(float)"); } virtual void write(const std::string & objname, const Dimens dimens, const float *f_array) { - throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(float* array)"); + eprintf("Function not implemented"); + //throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(float* array)"); } virtual void write(const std::string & objname, const Dimens dimens, const std::vector < float >&f_array) { - throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(vector array)"); + eprintf("Function not implemented"); + //throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(vector array)"); } // write double functions virtual void write(const std::string & objname, double d) { - throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(double)"); + eprintf("Function not implemented"); + //throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(double)"); } virtual void write(const std::string & objname, const Dimens dimens, const double *d_array) { - throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(double* array)"); + eprintf("Function not implemented"); + //throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(double* array)"); } virtual void write(const std::string & objname, const Dimens dimens, const std::vector < double >&d_array) { - throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(vector array)"); + eprintf("Function not implemented"); + //throw OutputException("Function not implemented", "PSK::OutputAdaptor::write(vector array)"); } }; From 31ebd95fd2c28d938acc22d7e50ac1d80d472f65 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Thu, 25 Jul 2013 17:11:39 +0200 Subject: [PATCH 024/118] issue #43: implemented new array classes --- include/Alloc.h | 468 ++++++++++++++++++++---- include/arrays.h | 188 ++++++++++ include/asserts.h | 7 +- tests/Makefile | 38 ++ tests/stopwatch.h | 97 +++++ tests/test_arrays.cpp | 827 ++++++++++++++++++++++++++++++++++++++++++ utility/asserts.cpp | 3 + utility/debug.cpp | 11 +- 8 files changed, 1557 insertions(+), 82 deletions(-) create mode 100644 include/arrays.h create mode 100644 tests/Makefile create mode 100644 tests/stopwatch.h create mode 100644 tests/test_arrays.cpp diff --git a/include/Alloc.h b/include/Alloc.h index a57b9b70..a0837911 100644 --- a/include/Alloc.h +++ b/include/Alloc.h @@ -1,103 +1,415 @@ +#ifndef IPIC_ALLOC_H +#define IPIC_ALLOC_H +#include // for alignment stuff +#include "ipicdefs.h" // for CHECK_BOUNDS +#include "asserts.h" // for assert_le, assert_lt +//#include "arrays.h" // fixed-dimension arrays -#ifndef ALLOC_H -#define ALLOC_H +/* + Array classes developed by + Alec Johnson, + consolidating arrays developed by + Reger Ferrer, Vicenç Beltran, and Florentino Sainz + and earlier arrays defined by + Jorge Amaya and Stefano Markidis. -#include - -/*! The allocator for 4D array */ -template < class type > type **** _new_4_array(int sz1, int sz2, int sz3, int sz4) { - - type ****all_x; - type ***all_y; - type **all_z; - type *all_r; + For examples of use of this class, + see test_arrays.cpp +*/ +#define ALIGNMENT (64) +#ifdef __INTEL_COMPILER + #define ALIGNED(X) __assume_aligned(X, ALIGNMENT) + #define AlignedAlloc(T, NUM) \ + (T *const __restrict__)(_mm_malloc(sizeof(T)*NUM, ALIGNMENT)) + #define AlignedFree(S) (_mm_free(S)) +#else + #define ALIGNED(X) + #define AlignedFree(S) (delete[] S) + #define AlignedAlloc(T, NUM) (new T[NUM]) +#endif - all_x = new type ***[sz1]; - all_y = new type **[sz1 * sz2]; - all_z = new type *[sz1 * sz2 * sz3]; - all_r = new type[sz1 * sz2 * sz3 * sz4]; +// Compile with -DCHECK_BOUNDS to turn on bounds checking. +//#define CHECK_BOUNDS +#ifdef CHECK_BOUNDS + #define check_bounds(n,S) {assert_le(0, n); assert_lt(n, S);} +#else + #define check_bounds(n,S) +#endif - type ****result = all_x; +/*** begin Array classes with flexible dimensions ***/ - for (int i = 0; i < sz1; i++, all_y += sz2) { - result[i] = all_y; - for (int j = 0; j < sz2; j++, all_z += sz3) { - result[i][j] = all_z; - for (int k = 0; k < sz3; k++, all_r += sz4) { - result[i][j][k] = all_r; - } - } +// methods to allocate arrays. +// These are a succinct equivalent of Jorge's earler methods, +// except for the use of AlignedAlloc in place of new. +// +template < class type > +inline type * newArray1(size_t sz1) +{ + type *arr = AlignedAlloc(type, sz1); // new type [sz1]; + return arr; +} +template < class type > +inline type ** newArray2(size_t sz1, size_t sz2) +{ + type **arr = AlignedAlloc(type*, sz1); // new type *[sz1]; + type *ptr = newArray1(sz1*sz2); + for (size_t i = 0; i < sz1; i++) + { + arr[i] = ptr; + ptr += sz2; } - - return result; + return arr; } - -/*! Deallocator for 4D arrays */ -template < class type > void delArr4(type **** arr, int dummyx, int dummyy, int dummyz) { - delete[]arr[0][0][0]; - delete[]arr[0][0]; - delete[]arr[0]; - delete[]arr; +template < class type > +inline type *** newArray3(size_t sz1, size_t sz2, size_t sz3) +{ + type ***arr = AlignedAlloc(type**, sz1); // new type **[sz1]; + type **ptr = newArray2(sz1*sz2, sz3); + for (size_t i = 0; i < sz1; i++) + { + arr[i] = ptr; + ptr += sz2; + } + return arr; +} +template +inline type **** newArray4(size_t sz1, size_t sz2, size_t sz3, size_t sz4) +{ + type ****arr = AlignedAlloc(type***, sz1); //(new type ***[sz1]); + type ***ptr = newArray3(sz1*sz2, sz3, sz4); + for (size_t i = 0; i < sz1; i++) { + arr[i] = ptr; + ptr += sz2; + } + return arr; } -/*! The allocator for 3D array */ -template < class type > type *** _new_3_array(int sz1, int sz2, int sz3) { - - type ***all_x; - type **all_y; - type *all_z; - - all_x = new type **[sz1]; - all_y = new type *[sz1 * sz2]; - all_z = new type[sz1 * sz2 * sz3]; - - type ***result = all_x; +// methods to deallocate arrays +// +template < class type > inline void delArray1(type * arr) +{ AlignedFree(arr); } +template < class type > inline void delArray2(type ** arr) +{ delArray1(arr[0]); AlignedFree(arr); } +template < class type > inline void delArray3(type *** arr) +{ delArray2(arr[0]); AlignedFree(arr); } +template < class type > inline void delArray4(type **** arr) +{ delArray3(arr[0]); AlignedFree(arr); } +// +// versions with dummy dimensions (for backwards compatibility) +// +template inline void delArr1(type * arr) +{ delArray1(arr); } +template inline void delArr2(type ** arr, size_t sz1) +{ delArray2(arr); } +template inline void delArr3(type *** arr, size_t sz1, size_t sz2) +{ delArray3(arr); } +template inline void delArr4(type **** arr, + size_t sz1, size_t sz2, size_t sz3) +{ delArray3(arr); } - for (int i = 0; i < sz1; i++, all_y += sz2) { - result[i] = all_y; - for (int j = 0; j < sz2; j++, all_z += sz3) { - result[i][j] = all_z; - } +// classes to dereference arrays. +// +// ArrayRefN is essentially a dumbed-down version of ArrN with +// an index shift applied to the underlying array. The purpose +// of ArrayRefN is to allow elements of multidimensional arrays +// to be accessed with a calculated one-dimensional index while +// using chained operator[] syntax (e.g. myarr[i][j]), i.e. the +// same syntax as is used for native or nested arrays. This +// implementation is likely to be slow unless optimization is +// turned on, allowing the compiler to figure out that the whole +// chain of calls to the operator[] methods and to the ArrayRefN +// constructors reduces to computing a one-dimensional subscript +// used to access a one-dimensional array. +// +template +class ArrayRef1 +{ + type* const __restrict__ arr; + const size_t S1; + const size_t shift; + public: + inline ArrayRef1(type*const arr_, size_t k, size_t s1) : + arr(arr_), shift(k), S1(s1) + {} + inline type& operator[](size_t n1){ + check_bounds(n1, S1); + ALIGNED(arr); + return arr[shift+n1]; } +}; - return result; +template +class ArrayRef2 +{ + type* const __restrict__ arr; + const size_t shift; + const size_t S2, S1; + public: + inline ArrayRef2(type*const arr_, size_t k, size_t s2, size_t s1) : + arr(arr_), shift(k), S2(s2), S1(s1) + {} + inline ArrayRef1 operator[](size_t n2){ + check_bounds(n2,S2); + return ArrayRef1(arr, (shift+n2)*S1, S1); + } +}; -} +template +class ArrayRef3 +{ + type* const __restrict__ arr; + const size_t shift; + const size_t S3, S2, S1; + public: + inline ArrayRef3(type*const arr_, size_t k, size_t s3, size_t s2, size_t s1) : + arr(arr_), shift(k), S3(s3), S2(s2), S1(s1) + {} + inline ArrayRef2 operator[](size_t n3){ + check_bounds(n3, S3); + return ArrayRef2(arr, (shift+n3)*S2, S2, S1); + } +}; -/*! Deallocator for 3D arrays */ -template < class type > void delArr3(type *** arr, int dummyx, int dummyy) { - delete[]arr[0][0]; - delete[]arr[0]; - delete[]arr; -} +// ArrN can adopt an array allocated by newArrN +// +// The purpose of these classes is to provide more efficient +// and more regulated access to array elements. The idea is to +// maintain backward compatibility while allowing us to move +// toward a proper array abstraction. +// +// The user of ArrN is responsible for memory management. +// The ArrayN classes are the version of this class +// with automatic deallocation. +// +// Examples: +// +// Using constructor to create array: +// { +// Arr2 arr(16, 16); +// arr[1][2] = 5; +// arr.free(); +// } +// Using ArrN to adopt an array allocated by newArrN +// { +// int** array = newArray2(16,16) +// Arr2 arr(array,16,16); // adopt array +// arr[1][2] = 5; +// assert_eq(arr[1][2],array[1][2]); +// // arr.free(); // should not do both this and next line. +// delArray2(array); +// } +// +// proposed improvements: +// - methods that use parallel arithmetic for omp and vectorized code -/*! The allocator for 2D array */ -template < class type > type ** _new_2_array(int sz1, int sz2) { +template +class Arr1 +{ + private: // data + type* const __restrict__ arr; + const size_t S1; + public: + ~Arr1() { } + void free() { AlignedFree(arr); } + Arr1(size_t s1) : + S1(s1), + arr(AlignedAlloc(type, s1)) + { } + Arr1(type* in, + size_t s1) : + S1(s1), + arr(in) + { } + inline type& operator[](size_t n1){ + check_bounds(n1, S1); + ALIGNED(arr); + return arr[n1]; + } + inline size_t getidx(size_t n1) const + { + check_bounds(n1, S1); + return n1; + } + const type& get(size_t n1) const + { ALIGNED(arr); return arr[getidx(n1)]; } + type& fetch(size_t n2,size_t n1) const + { ALIGNED(arr); return arr[getidx(n1)]; } + void set(size_t n1, type value) + { ALIGNED(arr); arr[getidx(n1)] = value; } +}; - type **all_x; - type *all_y; +template +class Arr2 +{ + private: // data + const size_t S2,S1; + type* const __restrict__ arr; + public: + ~Arr2(){} + void free() { AlignedFree(arr); } + Arr2(size_t s2, size_t s1) : + S2(s2), S1(s1), + arr(AlignedAlloc(type, s2*s1)) + { + } + Arr2(type*const* in, + size_t s2, size_t s1) : + S2(s2), S1(s1), + arr(*in) + { } + // for backwards compatibility support bracket notation + inline ArrayRef1 operator[](size_t n2){ + check_bounds(n2, S2); + return ArrayRef1(arr, n2*S1, S1); + } + inline size_t getidx(size_t n2, size_t n1) const + { + check_bounds(n2, S2); + check_bounds(n1, S1); + return n2*S1+n1; + } + // I prefer "fetch" over operator() to hilight read/write access + //type& operator()(size_t n2, size_t n1) const + // { ALIGNED(arr); return arr[n1+S1*n2]; } + type& fetch(size_t n2,size_t n1) const + { ALIGNED(arr); return arr[getidx(n2,n1)]; } + // better to use accessors that distinguish read from write: + const type& get(size_t n2,size_t n1) const + { ALIGNED(arr); return arr[getidx(n2,n1)]; } + void set(size_t n2,size_t n1, type value) + { ALIGNED(arr); arr[getidx(n2,n1)] = value; } +}; - all_x = new type *[sz1]; - all_y = new type[sz1 * sz2]; +template +class Arr3 +{ + private: // data + type* const __restrict__ arr; + const size_t S3,S2,S1; + public: + ~Arr3(){} + void free() { AlignedFree(arr); } + Arr3(size_t s3, size_t s2, size_t s1) : + S3(s3), S2(s2), S1(s1), + arr(AlignedAlloc(type, s3*s2*s1)) + { } + Arr3(type*const*const* in, + size_t s3, size_t s2, size_t s1) : + S3(s3), S2(s2), S1(s1), + arr(**in) + { } + inline ArrayRef2 operator[](size_t n3){ + check_bounds(n3, S3); + return ArrayRef2(arr, n3*S2, S2, S1); + } + type* get_arr(){return arr;} + inline size_t getidx(size_t n3, size_t n2, size_t n1) const + { + check_bounds(n3, S3); + check_bounds(n2, S2); + check_bounds(n1, S1); + return (n3*S2+n2)*S1+n1; + } + //type& operator()(size_t n3, size_t n2, size_t n1) const + //{ ALIGNED(arr); return arr[getidx(n3,n2,n1)]; } + type& fetch(size_t n3,size_t n2,size_t n1) const + { ALIGNED(arr); return arr[getidx(n3,n2,n1)]; } + const type& get(size_t n3,size_t n2,size_t n1) const + { ALIGNED(arr); return arr[getidx(n3,n2,n1)]; } + void set(size_t n3,size_t n2,size_t n1, type value) + { ALIGNED(arr); arr[getidx(n3,n2,n1)] = value; } +}; - type **result = all_x; +template +class Arr4 +{ + private: // data + const size_t S4,S3,S2,S1; + type* const __restrict__ arr; + public: + ~Arr4(){} // nonempty destructor would kill performance + void free() { AlignedFree(arr); } + Arr4(size_t s4, size_t s3, size_t s2, size_t s1) : + arr(AlignedAlloc(type, s4*s3*s2*s1)), + S4(s4), S3(s3), S2(s2), S1(s1) + { } + Arr4(type*const*const*const* in, + size_t s4, size_t s3, size_t s2, size_t s1) : + S4(s4), S3(s3), S2(s2), S1(s1), + arr(***in) + { } + inline ArrayRef3 operator[](size_t n4){ + check_bounds(n4, S4); + return ArrayRef3(arr, n4*S3, S3, S2, S1); + } + inline size_t getidx(size_t n4, size_t n3, size_t n2, size_t n1) const + { + check_bounds(n4, S4); + check_bounds(n3, S3); + check_bounds(n2, S2); + check_bounds(n1, S1); + return ((n4*S3+n3)*S2+n2)*S1+n1; + } + const type& get(size_t n4,size_t n3,size_t n2,size_t n1) const + { ALIGNED(arr); return arr[getidx(n4,n3,n2,n1)]; } + type& fetch(size_t n4,size_t n3,size_t n2,size_t n1) const + { ALIGNED(arr); return arr[getidx(n4,n3,n2,n1)]; } + void set(size_t n4,size_t n3,size_t n2,size_t n1, type value) + { ALIGNED(arr); arr[getidx(n4,n3,n2,n1)] = value; } +}; - for (int i = 0; i < sz1; i++, all_y += sz2) { - result[i] = all_y; - } +// Versions of array classes which automatically free memory. +// +// Note that the nonempty destructor kills performance +// unless compiling with -fno-exceptions - return result; +template +struct Array1 : public Arr1 +{ + ~Array1(){Arr1::free();} + Array1(size_t s1) : Arr1(s1) { } +}; -} +template +struct Array2 : public Arr2 +{ + ~Array2(){Arr2::free();} + Array2(size_t s2, size_t s1) : Arr2(s2,s1) { } +}; -/*! Deallocator for 2D arrays */ -template < class type > void delArr2(type ** arr, int dummyx) { - delete[]arr[0]; - delete[]arr; -} +template +struct Array3 : public Arr3 +{ + ~Array3(){Arr3::free();} + Arr3& fast_accessor() { return *(Arr3*)this; } + Array3(size_t s3, size_t s2, size_t s1) : Arr3(s3,s2,s1) { } +}; -#define newArr4(type,sz1,sz2,sz3,sz4) _new_4_array((sz1),(sz2),(sz3),(sz4)) -#define newArr3(type,sz1,sz2,sz3) _new_3_array((sz1),(sz2),(sz3)) -#define newArr2(type,sz1,sz2) _new_2_array((sz1),(sz2)) +template +struct Array4 : public Arr4 +{ + ~Array4(){Arr4::free();} + Array4(size_t s4, size_t s3, size_t s2, size_t s1) + : Arr4(s4,s3,s2,s1) { } +}; +// These aliases are defined for the following flexibilization purposes: +// - to avoid filling the code with template brackets +// (i.e., to minimize explicitly template-dependent code). +// - so that they can be redefined according to the user's +// preferred array implementation. +// +typedef Arr1 intArr1; +typedef Arr2 intArr2; +typedef Arr3 intArr3; +typedef Arr4 intArr4; +typedef Arr1 doubleArr1; +typedef Arr2 doubleArr2; +typedef Arr3 doubleArr3; +typedef Arr4 doubleArr4; +// +#define newArr4(type,sz1,sz2,sz3,sz4) newArray4((sz1),(sz2),(sz3),(sz4)) +#define newArr3(type,sz1,sz2,sz3) newArray3((sz1),(sz2),(sz3)) +#define newArr2(type,sz1,sz2) newArray2((sz1),(sz2)) +/*** end Array classes with flexible dimensions ***/ #endif diff --git a/include/arrays.h b/include/arrays.h new file mode 100644 index 00000000..1a99b39f --- /dev/null +++ b/include/arrays.h @@ -0,0 +1,188 @@ +#ifndef IPIC_ARRAYS_H +#define IPIC_ARRAYS_H +#include "Alloc.h" // variable-dimension arrays +/* + Fixed array class developed by + Alec Johnson + + For examples of use of this class, + see test_arrays.cpp +*/ + +/*** begin FixedArray classes for use when dimensions are known at compile time. ***/ +// +// These classes improve upon native fixed arrays as follows: +// - bounds-checking is performed if CHECK_BOUNDS is defined, +// - myarray(i,j) access is supported, and +// - functions can return fixed-dimension arrays, +// whereas since the C standard does not allow +// one to return an array with fixed dimensions. +// +// The purpose of implementing these extensions is so that +// fixed-dimension arrays can be used in iPic3D if doing so +// yields a significant performance benefit for the choice of +// architecture and compiler. + +template +class FixedArray1D +{ + public: + type arr[s1]; + public: + type& fetch(size_t n1) + { + check_bounds(n1,s1); + return arr[n1]; + } + type& operator[](size_t n1) + { + check_bounds(n1,s1); + return arr[n1]; + } +}; + +// auxiliary class for chained operator[] dereferencing of FixedArray2D +// +template +class FixedArray2D1 +{ + type (&arr)[s1][s2]; + size_t n1; + public: + FixedArray2D1(type (&_arr)[s1][s2], size_t _n1) : + arr(_arr), n1(_n1) {}; + type& operator[](size_t n2) + { + check_bounds(n1,s1); + check_bounds(n2,s2); + return arr[n1][n2]; + } +}; + +template +class FixedArray2D +{ + public: + type arr [s1][s2]; + public: + type& fetch(size_t n1, size_t n2) + { + check_bounds(n1,s1); + check_bounds(n2,s2); + return arr[n1][n2]; + } + // Chaining operator[] this way + // does not allow bounds checking + // and does not work beyond 2D. + //type* operator[](size_t n1) { return arr[n1]; } + FixedArray2D1 operator[](size_t n1) + { return FixedArray2D1(arr,n1); } +}; + +// auxiliary classes for chained operator[] dereferencing of FixedArray3D +// +template +class FixedArray3D1 +{ + type (&arr)[s1][s2][s3]; + size_t n1, n2; + public: + FixedArray3D1(type (&_arr)[s1][s2][s3], size_t _n1, size_t _n2) : + arr(_arr), n1(_n1), n2(_n2) {}; + type& operator[](size_t n3) + { + check_bounds(n1,s1); + check_bounds(n2,s2); + check_bounds(n3,s3); + return arr[n1][n2][n3]; + } +}; +// +template +class FixedArray3D2 +{ + type (&arr)[s1][s2][s3]; + size_t n1; + public: + FixedArray3D2(type (&_arr)[s1][s2][s3], size_t _n1) : arr(_arr), n1(_n1) {}; + FixedArray3D1 operator[](size_t n2) + { return FixedArray3D1(arr,n1,n2); } +}; + +template +struct FixedArray3D +{ + type arr [s1][s2][s3]; + public: + type& fetch(size_t n1, size_t n2, size_t n3) + { + check_bounds(n1,s1); + check_bounds(n2,s2); + check_bounds(n3,s3); + return arr[n1][n2][n3]; + } + // chained operator[] dereferencing requires + // auxiliary array dereferencing classes, + // since the C standard does not allow one to + // return an array with fixed dimensions. + FixedArray3D2 operator[](size_t n1) + { return FixedArray3D2(arr,n1); } +}; + +// auxiliary classes for chained operator[] dereferencing of FixedArray4D +// +template +class FixedArray4D1 +{ + type (&arr)[s1][s2][s3][s4]; + size_t n1,n2,n3; + public: + FixedArray4D1(type(&_arr)[s1][s2][s3][s4],size_t _n1,size_t _n2,size_t _n3): + arr(_arr), n1(_n1), n2(_n2), n3(_n3){}; + type& operator[](size_t n4) { return arr[n1][n2][n3][n4]; } +}; +// +template +class FixedArray4D2 +{ + type (&arr)[s1][s2][s3][s4]; + size_t n1,n2; + public: + FixedArray4D2(type (&_arr)[s1][s2][s3][s4], size_t _n1, size_t _n2) : + arr(_arr), n1(_n1), n2(_n2){}; + FixedArray4D1 operator[](size_t n3) + { return FixedArray4D1(arr,n1,n2,n3); } +}; +// +template +class FixedArray4D3 +{ + type (&arr)[s1][s2][s3][s4]; + size_t n1; + public: + FixedArray4D3(type (&_arr)[s1][s2][s3][s4], size_t _n1) : + arr(_arr), n1(_n1) {}; + FixedArray4D2 operator[](size_t n2) + { return FixedArray4D2(arr,n1,n2); } +}; + +template +class FixedArray4D +{ + public: + type arr [s1][s2][s3][s4]; + public: + type& fetch(size_t n1, size_t n2, size_t n3, size_t n4) + { + check_bounds(n1,s1); + check_bounds(n2,s2); + check_bounds(n3,s3); + check_bounds(n4,s4); + return arr[n1][n2][n3][n4]; + } + FixedArray4D3 operator[](size_t n1) + { return FixedArray4D3(arr,n1); } +}; +/*** end FixedArray classes for use when dimensions are known at compile time. ***/ + +#endif diff --git a/include/asserts.h b/include/asserts.h index a3cd4584..0a8e3b40 100644 --- a/include/asserts.h +++ b/include/asserts.h @@ -107,8 +107,11 @@ void assert_error(const char* file, int line, const char* func, \ const char* op, const char* lhs_str, const char* rhs_str, \ t1 lhs, t2 rhs); -declare_assert_errmsg(double, double); // this seems enough for all numbers -declare_assert_errmsg(int, int); // but maybe this is more efficient +declare_assert_errmsg(double, double); +declare_assert_errmsg(size_t, size_t); +declare_assert_errmsg(int, size_t); +declare_assert_errmsg(size_t, int); +declare_assert_errmsg(int, int); declare_assert_errmsg(const char *, const char *); // put in assert_string.h: // #include "assert.h" diff --git a/tests/Makefile b/tests/Makefile new file mode 100644 index 00000000..10559ab9 --- /dev/null +++ b/tests/Makefile @@ -0,0 +1,38 @@ + +INCLUDE = -I../include + +OBJECTS = \ + test_arrays.o \ + ../utility/asserts.o \ + debug.o + +FLAGS = -O3 -DNO_MPI -fno-exceptions #-DCHECK_BOUNDS -ggdb + +COMPILER = g++ #icpc # g++ + +test: clean_test_arrays test_arrays + +test_arrays: clean_test_arrays $(OBJECTS) + $(COMPILER) $(FLAGS) $(INCLUDE) $(OBJECTS) -o test_arrays + +test_arrays.o: + $(COMPILER) $(FLAGS) -c test_arrays.cpp $(INCLUDE) -o test_arrays.o + + +../utility/asserts.o: + $(COMPILER) $(FLAGS) -c ../utility/asserts.cpp $(INCLUDE) -o ../utility/asserts.o + +debug.o: + $(COMPILER) $(FLAGS) -c ../utility/debug.cpp $(INCLUDE) -o debug.o + +clean: clean_old_test_arrays clean_test_arrays + rm -f test_arrays $(OBJECTS) + +clean_test_arrays: + rm -f test_arrays test_arrays.o + +old_test_arrays: clean_old_test_arrays + $(COMPILER) $(FLAGS) old_test_arrays.cpp -o old_test_arrays + +clean_old_test_arrays: + rm -f old_test_arrays old_test_arrays.o diff --git a/tests/stopwatch.h b/tests/stopwatch.h new file mode 100644 index 00000000..c8bfbc5b --- /dev/null +++ b/tests/stopwatch.h @@ -0,0 +1,97 @@ +#include +#include +#include + +#define myuint64_t int +//#define myuint64_t uint64_t + +static inline myuint64_t tv_to_sec(struct timeval tv){ + return tv.tv_sec + tv.tv_usec/1000000; +} + +static inline myuint64_t tv_to_ms(struct timeval tv){ + return tv.tv_sec*1000 + tv.tv_usec/1000; +} + +static inline myuint64_t tv_to_us(struct timeval tv){ + return tv.tv_sec*1000000 + tv.tv_usec; +} + + +static inline struct timeval add_tv(const struct timeval a, const struct timeval b){ + const struct timeval res = { a.tv_sec+b.tv_sec, a.tv_usec+b.tv_usec }; + return res; +} + +static inline struct timeval diff_tv(const struct timeval start, const struct timeval stop){ + const struct timeval diff = {stop.tv_sec - start.tv_sec, stop.tv_usec - start.tv_usec}; + return diff; +} + +typedef enum {START, STOP, LAP, RESET} sw_action_t; +static inline int valid_sw_action(sw_action_t t){ + return t == START || t == STOP || t == LAP || RESET; +} + + +typedef enum {OFF, STARTED, STOPPED} sw_state_t; +static inline int valid_sw_state(sw_state_t s){ + return s == OFF || s == STOPPED || s == STARTED; +} + + +typedef struct { + sw_state_t state; + struct timeval total, now, last; +} stopwatch_t; + +static inline struct timeval sw_start(stopwatch_t * const sw ){ + sw->state = STARTED; + gettimeofday( &sw->now, 0 ); + sw->last = sw->now; + return sw->total; +} + +static inline struct timeval sw_stop(stopwatch_t * const sw ){ + sw->state = STOPPED; + gettimeofday( &sw->now, 0 ); + sw->total = add_tv(sw->total, diff_tv(sw->last, sw->now)); + sw->last = sw->now; + return sw->total; +} + +static inline struct timeval sw_lap(stopwatch_t * const sw ){ + gettimeofday( &sw->now, 0 ); + const struct timeval elapsed = diff_tv(sw->last, sw->now); + sw->total = add_tv(sw->total, elapsed); + sw->last = sw->now; + return elapsed; +} + +static inline struct timeval sw_reset(stopwatch_t * const sw ){ + const static stopwatch_t sw_off = { OFF, {0, 0}, {0, 0}, {0, 0} }; + sw->state = OFF; + *sw = sw_off; + return sw->total; +} + +static inline struct timeval stopwatch_mt(stopwatch_t * const sw, sw_action_t action){ + typedef struct timeval stopwatch_func_t( stopwatch_t *sw ); + + static stopwatch_func_t * const stopwatch_transitions[3][4] = { {sw_start, 0, 0, 0}, + {0, sw_stop, sw_lap, 0}, + {sw_start, 0, 0, sw_reset}}; + assert(sw != 0); + assert(valid_sw_action(action)); + assert(valid_sw_state(sw->state)); + assert(stopwatch_transitions[sw->state][action] != 0); + return stopwatch_transitions[sw->state][action](sw); +} + + +struct timeval stopwatch(sw_action_t action){ + static stopwatch_t sw = { OFF, {0, 0}, {0, 0}, {0, 0} }; + return stopwatch_mt(&sw, action); +} + + diff --git a/tests/test_arrays.cpp b/tests/test_arrays.cpp new file mode 100644 index 00000000..45cdc92c --- /dev/null +++ b/tests/test_arrays.cpp @@ -0,0 +1,827 @@ +/* + Reger Ferrer + Vicenç Beltran + Alec Johnson +*/ + +#include +#include +#include +#include "stopwatch.h" +#include "arrays.h" +#include "Alloc.h" +#include "asserts.h" +#include "debug.h" + +/**** begin Jorge Amaya's array allocation methods ****/ + +/*! The allocator for 4D array */ +template < class type > type **** newArray4_Amaya(int sz1, int sz2, int sz3, int sz4) { + + type ****all_x; + type ***all_y; + type **all_z; + type *all_r; + + all_x = new type ***[sz1]; + all_y = new type **[sz1 * sz2]; + all_z = new type *[sz1 * sz2 * sz3]; + all_r = new type[sz1 * sz2 * sz3 * sz4]; + + type ****result = all_x; + + for (int i = 0; i < sz1; i++, all_y += sz2) { + result[i] = all_y; + for (int j = 0; j < sz2; j++, all_z += sz3) { + result[i][j] = all_z; + for (int k = 0; k < sz3; k++, all_r += sz4) { + result[i][j][k] = all_r; + } + } + } + + return result; +} + +/*! Deallocator for 4D arrays */ +template < class type > void delArr4_Amaya(type **** arr, int dummyx, int dummyy, int dummyz) { + delete[]arr[0][0][0]; + delete[]arr[0][0]; + delete[]arr[0]; + delete[]arr; +} + +/*! The allocator for 3D array */ +template < class type > type *** newArray3_Amaya(int sz1, int sz2, int sz3) { + + type ***all_x; + type **all_y; + type *all_z; + + all_x = new type **[sz1]; + all_y = new type *[sz1 * sz2]; + all_z = new type[sz1 * sz2 * sz3]; + + type ***result = all_x; + + for (int i = 0; i < sz1; i++, all_y += sz2) { + result[i] = all_y; + for (int j = 0; j < sz2; j++, all_z += sz3) { + result[i][j] = all_z; + } + } + + return result; + +} + +/*! Deallocator for 3D arrays */ +template < class type > void delArr3_Amaya(type *** arr, int dummyx, int dummyy) { + delete[]arr[0][0]; + delete[]arr[0]; + delete[]arr; +} + +/*! The allocator for 2D array */ +template < class type > type ** newArr2_Amaya(int sz1, int sz2) { + + type **all_x; + type *all_y; + + all_x = new type *[sz1]; + all_y = new type[sz1 * sz2]; + + type **result = all_x; + + for (int i = 0; i < sz1; i++, all_y += sz2) { + result[i] = all_y; + } + + return result; + +} + +/*! Deallocator for 2D arrays */ +template < class type > void delArr2_Amaya(type ** arr, int dummyx) { + delete[]arr[0]; + delete[]arr; +} + +#define newArr4_Amaya(type,sz1,sz2,sz3,sz4) newArray4_Amaya((sz1),(sz2),(sz3),(sz4)) +#define newArr3_Amaya(type,sz1,sz2,sz3) newArray3_Amaya((sz1),(sz2),(sz3)) +#define newArr2_Amaya(type,sz1,sz2) newArray2_Amaya((sz1),(sz2)) + +/**** end Jorge Amaya's array allocation methods ****/ + +/****** begin (i,j) arrays from Reger Ferrer and Vicenç Beltran ******/ + +template +class Rank1 +{ + const size_t S1; + type * __restrict__ const arr; + +public: + + Rank1(size_t s1) : S1(s1), arr(AlignedAlloc(type, s1)) {} + + //Rank1( const Rank1& other ) : S1( other.S1 ), arr( other.arr ) {} + + type& operator()(size_t n1) const + { + ALIGNED(arr); + return arr[n1]; + } + + size_t dim1() const { return S1; } + + ~Rank1() { }; +}; + +template +class Rank2 +{ + const size_t S1, S2; + type * __restrict__ const arr; + + +public: + Rank2(size_t s1, size_t s2) : S1(s1), S2(s2), arr(AlignedAlloc(type, s1*s2)) {} + + //Rank2( const Rank2& other ) : S1( other.S1 ), S2( other.S2 ), arr( other.arr ) {} + + type& operator()(size_t n1, size_t n2) const + { + check_bounds(n1,S1); + check_bounds(n2,S2); + ALIGNED(arr); + return arr[n2+S2*n1]; + } + type& fetch(size_t n1,size_t n2) const + { + check_bounds(n1,S1); + check_bounds(n2,S2); + ALIGNED(arr); + return arr[n2+S2*n1]; + } + + size_t dim1() const { return S1; } + size_t dim2() const { return S2; } + + void free() { + AlignedFree(arr); + } + + ~Rank2() { }; +}; + +template +class Rank3 +{ + const size_t S1, S2, S3; + type * const __restrict__ arr; + + +public: + + Rank3(size_t s1, size_t s2, size_t s3) : S1(s1), S2(s2), S3(s3), + arr(AlignedAlloc(type, s1*s2*s3)) {} + + //Rank3( const Rank3& other ) : S1( other.S1 ), S2( other.S2 ), S3( other.S3 ), + //arr( other.arr ) {} + + type& operator()(size_t n1, size_t n2, size_t n3) const + { + check_bounds(n1,S1); + check_bounds(n2,S2); + check_bounds(n3,S3); + ALIGNED(arr); + return arr[n3+S3*(n2+S2*n1)]; + } + type& fetch(size_t n1, size_t n2, size_t n3) const + { + check_bounds(n1,S1); + check_bounds(n2,S2); + check_bounds(n3,S3); + ALIGNED(arr); + return arr[n3+S3*(n2+S2*n1)]; + } + const type& get(size_t n1, size_t n2, size_t n3) const + { + check_bounds(n1,S1); + check_bounds(n2,S2); + check_bounds(n3,S3); + ALIGNED(arr); + return arr[n3+S3*(n2+S2*n1)]; + } + + ~Rank3() { } + + size_t dim1() const { return S1; } + size_t dim2() const { return S2; } + size_t dim3() const { return S3; } + + void free() { + AlignedFree(arr); + } +}; + +template +class Rank4 +{ + const size_t S1, S2, S3, S4; + type* __restrict__ const arr; + +public: + + Rank4(size_t s1, size_t s2, size_t s3, size_t s4) : S1(s1), S2(s2), S3(s3), S4(s4), + arr(AlignedAlloc(type, s1*s2*s3*s4)) {} + + //Rank4( const Rank4& other ) : S1( other.S1 ), S2( other.S2 ), S3( other.S3 ), S4( other.S4 ), + //arr( other.arr ) {} + + type& operator()(size_t n1, size_t n2, size_t n3, size_t n4) const + { + check_bounds(n1,S1); + check_bounds(n2,S2); + check_bounds(n3,S3); + check_bounds(n4,S4); + ALIGNED(arr); + return arr[n4+S4*(n3+S3*(n2+S2*n1))]; + } + + ~Rank4() { } + + size_t dim1() const { return S1; } + size_t dim2() const { return S2; } + size_t dim3() const { return S3; } + size_t dim4() const { return S4; } + + void free() { AlignedFree(arr); } + +}; + +/******** end (i,j) arrays from Reger Ferrer and Vicenç Beltran ******/ + +/****** begin [i][j] arrays from Reger Ferrer and Vicenç Beltran ******/ + +template +class BracketRank1 +{ + const size_t S1; + type * __restrict__ const arr; + +public: + BracketRank1(size_t s1, void * __restrict__ const storage) : S1(s1), + arr(reinterpret_cast(storage)){} + + BracketRank1(size_t s1) : S1(s1), arr(new type[s1]){} + + type& operator[](size_t i) const + { + return arr[i]; + } + +}; + +template +class BracketRank2 +{ + const size_t S1, S2; + type * __restrict__ const arr; + +public: + void free(){ delete[] arr; } + BracketRank2(size_t s1, size_t s2, void *storage) : S1(s1), S2(s2), + arr(reinterpret_cast(storage)){} + + BracketRank2(size_t s1, size_t s2) : S1(s1), S2(s2), + arr(new type[s1*s2]) {} + + BracketRank1 operator[](size_t i) const + { + return BracketRank1(S2, arr + i * S2); + } + type& operator()(size_t n1, size_t n2) const + { + ALIGNED(arr); + return arr[n2+S2*n1]; + } +}; + +/******** end [i][j] arrays from Reger Ferrer and Vicenç Beltran ******/ + +using namespace std; + +template +void testArr2_diagonal() +{ + const int ITERS = 10000; + const size_t dim1 = 64; + const size_t dim2 = 64; + + BracketRank2 Abra(dim1, dim2); + BracketRank2 Bbra(dim1, dim2); + BracketRank2 Cbra(dim1, dim2); + + Rank2 Apar(dim1, dim2); + Rank2 Bpar(dim1, dim2); + Rank2 Cpar(dim1, dim2); + + FixedArray2D Afix ; + FixedArray2D Bfix ; + FixedArray2D Cfix ; + + type** Aold = newArr2(type, dim1, dim2); + type** Bold = newArr2(type, dim1, dim2); + type** Cold = newArr2(type, dim1, dim2); + + Arr2 Aarr(dim1, dim2); + Arr2 Barr(dim1, dim2); + Arr2 Carr(dim1, dim2); + + printf("Initializing data ...\n"); + for(size_t i=0; i +void testArr2() +{ + const int ITERS = 10000; + const size_t dim1 = 64; + const size_t dim2 = 64; + + BracketRank2 Abra(dim1, dim2); + BracketRank2 Bbra(dim1, dim2); + BracketRank2 Cbra(dim1, dim2); + + Rank2 Apar(dim1, dim2); + Rank2 Bpar(dim1, dim2); + Rank2 Cpar(dim1, dim2); + + FixedArray2D Afix ; + FixedArray2D Bfix ; + FixedArray2D Cfix ; + + type** Aold = newArr2(type, dim1, dim2); + type** Bold = newArr2(type, dim1, dim2); + type** Cold = newArr2(type, dim1, dim2); + + Arr2 Aarr(dim1, dim2); + Arr2 Barr(dim1, dim2); + Arr2 Carr(dim1, dim2); + + printf("Initializing data ...\n"); + for(size_t i=0; i +void testArr3() +{ + const int ITERS = 100; + const size_t dim1 = 64; + const size_t dim2 = 64; + const size_t dim3 = 64; + + Rank3 Apar(dim1, dim2, dim3); + Rank3 Bpar(dim1, dim2, dim3); + Rank3 Cpar(dim1, dim2, dim3); + + FixedArray3D Afix ; + FixedArray3D Bfix ; + FixedArray3D Cfix ; + + type*** Aold = newArr3(type, dim1, dim2, dim3); + type*** Bold = newArr3(type, dim1, dim2, dim3); + type*** Cold = newArr3(type, dim1, dim2, dim3); + + //Array3 Aarr(dim1, dim2, dim3); + //Array3 Barr(dim1, dim2, dim3); + //Array3 Carr(dim1, dim2, dim3); + Arr3 Aarr(dim1, dim2, dim3); + Arr3 Barr(dim1, dim2, dim3); + Arr3 Carr(dim1, dim2, dim3); + + printf("Initializing data ...\n"); + for(size_t i=0; i +void testArr4() +{ + // For some bizarre reason, if I comment out the code for the + // "fbr" and "fpa" arrays below then icpc on knc2 is somehow + // able to figure out that each iteration does the same thing + // in the case of Arr4, but not in the case of the chained + // pointer or fixed-dimension arrays. Why not? And why + // does this optimization occur for four-dimensional arrays + // and not for 3- or 2-dimensional arrays? And why is this + // optimization no longer performed if "fbr" and "fpa" stuff + // is included? The times are baffling. + const int ITERS = 1; + const size_t dim1 = 16; + const size_t dim2 = 16; + const size_t dim3 = 16; + const size_t dim4 = 16; + + FixedArray4D Afix; + FixedArray4D Bfix; + FixedArray4D Cfix; + + type**** Aold = newArr4(type, dim1, dim2, dim3, dim4); + type**** Bold = newArr4(type, dim1, dim2, dim3, dim4); + type**** Cold = newArr4(type, dim1, dim2, dim3, dim4); + + //Array4 Afbr(dim1, dim2, dim3, dim4); + //Array4 Bfbr(dim1, dim2, dim3, dim4); + //Array4 Cfbr(dim1, dim2, dim3, dim4); + + //Array4 Afpa(dim1, dim2, dim3, dim4); + //Array4 Bfpa(dim1, dim2, dim3, dim4); + //Array4 Cfpa(dim1, dim2, dim3, dim4); + + Arr4 Abra(dim1, dim2, dim3, dim4); + Arr4 Bbra(dim1, dim2, dim3, dim4); + Arr4 Cbra(dim1, dim2, dim3, dim4); + + Arr4 Apar(dim1, dim2, dim3, dim4); + Arr4 Bpar(dim1, dim2, dim3, dim4); + Arr4 Cpar(dim1, dim2, dim3, dim4); + + printf("Initializing data ...\n"); + for(size_t i=0; i (diagonal) ===\n"); + //testArr2_diagonal(); + //printf("=== testing Arr2 (diagonal) ===\n"); + //testArr2_diagonal(); + printf("=== testing Arr2 ===\n"); + testArr2(); + printf("=== testing Arr2 ===\n"); + testArr2(); + printf("=== testing Arr3 ===\n"); + testArr3(); + printf("=== testing Arr3 ===\n"); + testArr3(); + printf("=== testing Arr4 ===\n"); + testArr4(); + printf("=== testing Arr4 ===\n"); + testArr4(); +} diff --git a/utility/asserts.cpp b/utility/asserts.cpp index 576944f4..cf3716b6 100644 --- a/utility/asserts.cpp +++ b/utility/asserts.cpp @@ -19,5 +19,8 @@ void assert_error(const char *file, int line, const char *func, const char *op, abort(); \ } +implement_assert_errmsg(size_t, size_t); +implement_assert_errmsg(int, size_t); +implement_assert_errmsg(size_t, int); implement_assert_errmsg(int, int); implement_assert_errmsg(const char *, const char *); diff --git a/utility/debug.cpp b/utility/debug.cpp index 306775fb..50ad6e01 100644 --- a/utility/debug.cpp +++ b/utility/debug.cpp @@ -1,5 +1,7 @@ -#include "MPIdata.h" // for get_rank +#ifndef NO_MPI + #include "MPIdata.h" // for get_rank +#endif #include "debug.h" #define implement_dprintvar_fileLine(code,type) \ @@ -17,8 +19,13 @@ void dfprintf_fileLine(FILE * fptr, const char *func, const char *file, int line fflush(fptr); va_list args; va_start(args, format); - fprintf(fptr, "(%d) DEBUG %s(), %s:%d: ", + fprintf(fptr, +#ifndef NO_MPI + "(%d) DEBUG %s(), %s:%d: ", MPIdata::get_rank(), +#else + "DEBUG %s(), %s:%d: ", +#endif func, file, // my_basename(file), line_number); /* print out remainder of message */ From 9b4962a7b08e7fd957ad1c6b2f83946f910ed5ef Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 2 Aug 2013 18:23:24 +0200 Subject: [PATCH 025/118] fixed compiler errors introduced in previous commit (g++) --- fields/EMfields3D.cpp | 2 +- include/Alloc.h | 1 - include/asserts.h | 1 + particles/Particles3D.cpp | 2 +- particles/Particles3Dcomm.cpp | 3 +-- utility/asserts.cpp | 1 + 6 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 7be94a18..e6600592 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -208,7 +208,7 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop // const long long nop_ll = pcls.getNOP(); const int nop = pcls.getNOP(); - assert_le(nop_ll,INT_MAX); // else would need to use long long + assert_le(nop_ll, (long long) INT_MAX); // else would need to use long long // To make memory use scale to a large number of threads, we // could first apply an efficient parallel sorting algorithm // to the particles and then accumulate moments in smaller diff --git a/include/Alloc.h b/include/Alloc.h index a0837911..33b70b1b 100644 --- a/include/Alloc.h +++ b/include/Alloc.h @@ -1,7 +1,6 @@ #ifndef IPIC_ALLOC_H #define IPIC_ALLOC_H #include // for alignment stuff -#include "ipicdefs.h" // for CHECK_BOUNDS #include "asserts.h" // for assert_le, assert_lt //#include "arrays.h" // fixed-dimension arrays diff --git a/include/asserts.h b/include/asserts.h index 0a8e3b40..e46b9c77 100644 --- a/include/asserts.h +++ b/include/asserts.h @@ -112,6 +112,7 @@ declare_assert_errmsg(size_t, size_t); declare_assert_errmsg(int, size_t); declare_assert_errmsg(size_t, int); declare_assert_errmsg(int, int); +declare_assert_errmsg(long long, long long); declare_assert_errmsg(const char *, const char *); // put in assert_string.h: // #include "assert.h" diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index cc83f3f0..7a4d01d0 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -329,7 +329,7 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { const double dto2 = .5 * dt, qomdt2 = qom * dto2 / c; const double inv_dx = 1.0 / dx, inv_dy = 1.0 / dy, inv_dz = 1.0 / dz; - assert_le(nop,INT_MAX); // else would need to use long long + assert_le(nop,(long long) INT_MAX); // else would need to use long long // don't bother trying to push any particles simultaneously; // MIC already does vectorization automatically, and trying // to do it by hand only hurts performance. diff --git a/particles/Particles3Dcomm.cpp b/particles/Particles3Dcomm.cpp index 3e5c1ede..e9c73aaa 100644 --- a/particles/Particles3Dcomm.cpp +++ b/particles/Particles3Dcomm.cpp @@ -292,13 +292,12 @@ void Particles3Dcomm::interpP2G(Field * EMf, Grid * grid, VirtualTopology3D * vc const double nxn = grid->getNXN(); const double nyn = grid->getNYN(); const double nzn = grid->getNZN(); - assert_le(nop,INT_MAX); // else would need to use long long + assert_le(nop,(long long)INT_MAX); // else would need to use long long // to make memory use scale to a large number of threads we // could first apply an efficient parallel sorting algorithm // to the particles and then accumulate moments in smaller // subarrays. { - assert_le(nop,INT_MAX); // else would need to use long long for (int i = 0; i < nop; i++) { const int ix = 2 + int (floor((x[i] - xstart) * inv_dx)); diff --git a/utility/asserts.cpp b/utility/asserts.cpp index cf3716b6..312fe79b 100644 --- a/utility/asserts.cpp +++ b/utility/asserts.cpp @@ -23,4 +23,5 @@ implement_assert_errmsg(size_t, size_t); implement_assert_errmsg(int, size_t); implement_assert_errmsg(size_t, int); implement_assert_errmsg(int, int); +implement_assert_errmsg(long long, long long); implement_assert_errmsg(const char *, const char *); From 522138871a9bcd007c89b8b630823d864e577510 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 5 Aug 2013 15:47:26 +0200 Subject: [PATCH 026/118] implemented use of doubleArr3 for Moments class --- fields/EMfields3D.cpp | 44 +++++++++++------------ fields/Moments.cpp | 32 ----------------- include/Alloc.h | 21 ++++++++--- include/Moments.h | 83 ++++++++++++++++++++++++------------------- 4 files changed, 86 insertions(+), 94 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index e6600592..d430aae2 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -169,8 +169,8 @@ EMfields3D::EMfields3D(Collective * col, Grid * grid) { momentsArray = new Moments*[sizeMomentsArray]; for(int i=0;iinit(nxn,nyn,nzn); + momentsArray[i] = new Moments(nxn,nyn,nzn); + //momentsArray[i]->init(nxn,nyn,nzn); } } @@ -220,16 +220,16 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop Moments& speciesMoments = fetch_momentsArray(thread_num); speciesMoments.set_to_zero(); // - double*** rho = speciesMoments.fetch_rho(); - double*** Jx = speciesMoments.fetch_Jx(); - double*** Jy = speciesMoments.fetch_Jy(); - double*** Jz = speciesMoments.fetch_Jz(); - double*** Pxx = speciesMoments.fetch_Pxx(); - double*** Pxy = speciesMoments.fetch_Pxy(); - double*** Pxz = speciesMoments.fetch_Pxz(); - double*** Pyy = speciesMoments.fetch_Pyy(); - double*** Pyz = speciesMoments.fetch_Pyz(); - double*** Pzz = speciesMoments.fetch_Pzz(); + doubleArr3& rho = speciesMoments.fetch_rho(); + doubleArr3& Jx = speciesMoments.fetch_Jx(); + doubleArr3& Jy = speciesMoments.fetch_Jy(); + doubleArr3& Jz = speciesMoments.fetch_Jz(); + doubleArr3& Pxx = speciesMoments.fetch_Pxx(); + doubleArr3& Pxy = speciesMoments.fetch_Pxy(); + doubleArr3& Pxz = speciesMoments.fetch_Pxz(); + doubleArr3& Pyy = speciesMoments.fetch_Pyy(); + doubleArr3& Pyz = speciesMoments.fetch_Pyz(); + doubleArr3& Pzz = speciesMoments.fetch_Pzz(); // The following loop is expensive, so it is wise to assume that the // compiler is stupid. Therefore we should on the one hand // expand things out and on the other hand avoid repeating computations. @@ -389,16 +389,16 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop // One-dimensional array access is presumably // more efficient on poor compilers. // - const double*const rho1d = rho[0][0]; - const double*const Jx1d = Jx [0][0]; - const double*const Jy1d = Jy [0][0]; - const double*const Jz1d = Jz [0][0]; - const double*const Pxx1d = Pxx[0][0]; - const double*const Pxy1d = Pxy[0][0]; - const double*const Pxz1d = Pxz[0][0]; - const double*const Pyy1d = Pyy[0][0]; - const double*const Pyz1d = Pyz[0][0]; - const double*const Pzz1d = Pzz[0][0]; + doubleArr1 rho1d = rho.fetch_Arr1(); + doubleArr1 Jx1d = Jx .fetch_Arr1(); + doubleArr1 Jy1d = Jy .fetch_Arr1(); + doubleArr1 Jz1d = Jz .fetch_Arr1(); + doubleArr1 Pxx1d = Pxx.fetch_Arr1(); + doubleArr1 Pxy1d = Pxy.fetch_Arr1(); + doubleArr1 Pxz1d = Pxz.fetch_Arr1(); + doubleArr1 Pyy1d = Pyy.fetch_Arr1(); + doubleArr1 Pyz1d = Pyz.fetch_Arr1(); + doubleArr1 Pzz1d = Pzz.fetch_Arr1(); // assert_eq(speciesMoments.get_nx(), nxn); assert_eq(speciesMoments.get_ny(), nyn); diff --git a/fields/Moments.cpp b/fields/Moments.cpp index 5565929b..6c31a2f1 100644 --- a/fields/Moments.cpp +++ b/fields/Moments.cpp @@ -1,38 +1,6 @@ #include "Moments.h" #include "Alloc.h" -// construct empty instance (not zeroed) -void Moments::init(int nx_, int ny_, int nz_) -{ - nx = nx_; - ny = ny_; - nz = nz_; - rho = newArr3(double, nx, ny, nz); - Jx = newArr3(double, nx, ny, nz); - Jy = newArr3(double, nx, ny, nz); - Jz = newArr3(double, nx, ny, nz); - pXX = newArr3(double, nx, ny, nz); - pXY = newArr3(double, nx, ny, nz); - pXZ = newArr3(double, nx, ny, nz); - pYY = newArr3(double, nx, ny, nz); - pYZ = newArr3(double, nx, ny, nz); - pZZ = newArr3(double, nx, ny, nz); -} - -Moments::~Moments() { - // nodes and species - delArr3(rho, nx, ny); - delArr3(Jx, nx, ny); - delArr3(Jy, nx, ny); - delArr3(Jz, nx, ny); - delArr3(pXX, nx, ny); - delArr3(pXY, nx, ny); - delArr3(pXZ, nx, ny); - delArr3(pYY, nx, ny); - delArr3(pYZ, nx, ny); - delArr3(pZZ, nx, ny); -} - void Moments::set_to_zero() { // #pragma omp parallel for collapse(1) for (register int i = 0; i < nx; i++) diff --git a/include/Alloc.h b/include/Alloc.h index 33b70b1b..8441221e 100644 --- a/include/Alloc.h +++ b/include/Alloc.h @@ -14,6 +14,8 @@ For examples of use of this class, see test_arrays.cpp + + An alternative would be to use boost arrays. */ #define ALIGNMENT (64) #ifdef __INTEL_COMPILER @@ -199,14 +201,21 @@ class ArrayRef3 // } // // proposed improvements: +// - allow shifting of the base: +// - need "double shift" in each class +// - need to implement "arr3.set_bases(b1,b2,b3);" +// which calculates "shift". +// - need "const size_t b1, b2, b3;" for beginning indices +// to allow bounds checking. Should not incur run-time +// penalty, but it so then condition on CHECK_BOUNDS. // - methods that use parallel arithmetic for omp and vectorized code template class Arr1 { private: // data - type* const __restrict__ arr; const size_t S1; + type* const __restrict__ arr; public: ~Arr1() { } void free() { AlignedFree(arr); } @@ -277,15 +286,19 @@ class Arr2 { ALIGNED(arr); return arr[getidx(n2,n1)]; } void set(size_t n2,size_t n1, type value) { ALIGNED(arr); arr[getidx(n2,n1)] = value; } + inline Arr1fetch_Arr1(){ return Arr1(arr, S1*S2); } }; template class Arr3 { private: // data - type* const __restrict__ arr; const size_t S3,S2,S1; + type* const __restrict__ arr; public: + size_t dim1()const{return S1;} + size_t dim2()const{return S2;} + size_t dim3()const{return S3;} ~Arr3(){} void free() { AlignedFree(arr); } Arr3(size_t s3, size_t s2, size_t s1) : @@ -301,7 +314,6 @@ class Arr3 check_bounds(n3, S3); return ArrayRef2(arr, n3*S2, S2, S1); } - type* get_arr(){return arr;} inline size_t getidx(size_t n3, size_t n2, size_t n1) const { check_bounds(n3, S3); @@ -317,6 +329,7 @@ class Arr3 { ALIGNED(arr); return arr[getidx(n3,n2,n1)]; } void set(size_t n3,size_t n2,size_t n1, type value) { ALIGNED(arr); arr[getidx(n3,n2,n1)] = value; } + inline Arr1fetch_Arr1(){ return Arr1(arr, S1*S2*S3); } }; template @@ -380,7 +393,6 @@ template struct Array3 : public Arr3 { ~Array3(){Arr3::free();} - Arr3& fast_accessor() { return *(Arr3*)this; } Array3(size_t s3, size_t s2, size_t s1) : Arr3(s3,s2,s1) { } }; @@ -406,6 +418,7 @@ typedef Arr1 doubleArr1; typedef Arr2 doubleArr2; typedef Arr3 doubleArr3; typedef Arr4 doubleArr4; +typedef ArrayRef1 doubleArrRef1; // #define newArr4(type,sz1,sz2,sz3,sz4) newArray4((sz1),(sz2),(sz3),(sz4)) #define newArr3(type,sz1,sz2,sz3) newArray3((sz1),(sz2),(sz3)) diff --git a/include/Moments.h b/include/Moments.h index cb0018aa..53fe942f 100644 --- a/include/Moments.h +++ b/include/Moments.h @@ -1,24 +1,25 @@ #ifndef Moments_H #define Moments_H +#include "Alloc.h" // class to accumulate node-centered species moments // class Moments { private: - double ***rho; + doubleArr3 rho; /** current density, defined on nodes */ - double ***Jx; - double ***Jy; - double ***Jz; + doubleArr3 Jx; + doubleArr3 Jy; + doubleArr3 Jz; /** pressure tensor components, defined on nodes */ - double ***pXX; - double ***pXY; - double ***pXZ; - double ***pYY; - double ***pYZ; - double ***pZZ; + doubleArr3 pXX; + doubleArr3 pXY; + doubleArr3 pXZ; + doubleArr3 pYY; + doubleArr3 pYZ; + doubleArr3 pZZ; int nx; int ny; int nz; @@ -27,35 +28,45 @@ class Moments { int get_nx() const { return nx; } int get_ny() const { return ny; } int get_nz() const { return nz; } - double get_rho(int i, int j, int k) const { return rho[i][j][k]; } - double get_Jx (int i, int j, int k) const { return Jx [i][j][k]; } - double get_Jy (int i, int j, int k) const { return Jy [i][j][k]; } - double get_Jz (int i, int j, int k) const { return Jz [i][j][k]; } - double get_pXX(int i, int j, int k) const { return pXX[i][j][k]; } - double get_pXY(int i, int j, int k) const { return pXY[i][j][k]; } - double get_pXZ(int i, int j, int k) const { return pXZ[i][j][k]; } - double get_pYY(int i, int j, int k) const { return pYY[i][j][k]; } - double get_pYZ(int i, int j, int k) const { return pYZ[i][j][k]; } - double get_pZZ(int i, int j, int k) const { return pZZ[i][j][k]; } + double get_rho(int i, int j, int k) const { return rho.get(i,j,k); } + double get_Jx (int i, int j, int k) const { return Jx .get(i,j,k); } + double get_Jy (int i, int j, int k) const { return Jy .get(i,j,k); } + double get_Jz (int i, int j, int k) const { return Jz .get(i,j,k); } + double get_pXX(int i, int j, int k) const { return pXX.get(i,j,k); } + double get_pXY(int i, int j, int k) const { return pXY.get(i,j,k); } + double get_pXZ(int i, int j, int k) const { return pXZ.get(i,j,k); } + double get_pYY(int i, int j, int k) const { return pYY.get(i,j,k); } + double get_pYZ(int i, int j, int k) const { return pYZ.get(i,j,k); } + double get_pZZ(int i, int j, int k) const { return pZZ.get(i,j,k); } // fetch accessors (write access) - double*** fetch_rho() { return rho; } - double*** fetch_Jx () { return Jx ; } - double*** fetch_Jy () { return Jy ; } - double*** fetch_Jz () { return Jz ; } - double*** fetch_Pxx() { return pXX; } - double*** fetch_Pxy() { return pXY; } - double*** fetch_Pxz() { return pXZ; } - double*** fetch_Pyy() { return pYY; } - double*** fetch_Pyz() { return pYZ; } - double*** fetch_Pzz() { return pZZ; } + doubleArr3& fetch_rho() { return rho; } + doubleArr3& fetch_Jx () { return Jx ; } + doubleArr3& fetch_Jy () { return Jy ; } + doubleArr3& fetch_Jz () { return Jz ; } + doubleArr3& fetch_Pxx() { return pXX; } + doubleArr3& fetch_Pxy() { return pXY; } + doubleArr3& fetch_Pxz() { return pXZ; } + doubleArr3& fetch_Pyy() { return pYY; } + doubleArr3& fetch_Pyz() { return pYZ; } + doubleArr3& fetch_Pzz() { return pZZ; } public: - Moments() { + Moments(int nxn, int nyn, int nzn) : + nx(nxn), + ny(nyn), + nz(nzn), + rho (nxn, nyn, nzn), + Jx (nxn, nyn, nzn), + Jy (nxn, nyn, nzn), + Jz (nxn, nyn, nzn), + pXX (nxn, nyn, nzn), + pXY (nxn, nyn, nzn), + pXZ (nxn, nyn, nzn), + pYY (nxn, nyn, nzn), + pYZ (nxn, nyn, nzn), + pZZ (nxn, nyn, nzn) + { }; - Moments(int nx_, int ny_, int nz_){ - init(nx_,ny_,nz_); - } - void init(int nx_, int ny_, int nz_); - ~Moments(); + ~Moments(){}; void set_to_zero(); }; From c5328555704eb519601121bc67f9ae6440a972a5 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Wed, 7 Aug 2013 13:50:29 +0200 Subject: [PATCH 027/118] iss #43: implemented array classes; fixes iss #44 (memory leak) --- ConfigFile/src/ConfigFile.cpp | 1 + PSKOutput3D/PSKhdf5adaptor.cpp | 18 +- communication/ComInterpNodes3D.cpp | 3 +- communication/ComNodes3D.cpp | 44 +- communication/ComParser3D.cpp | 1 + fields/EMfields3D.cpp | 779 +++++++++---------------- grids/Grid3DCU.cpp | 55 +- iPic3D.cpp | 1 + include/Alloc.h | 859 ++++++++++++++++++---------- include/Basic.h | 139 +++-- include/ComNodes3D.h | 27 +- include/EMfields3D.h | 453 +++++++-------- include/Grid3DCU.h | 66 ++- include/Moments.h | 40 +- include/PSKhdf5adaptor.h | 7 +- include/TransArraySpace3D.h | 19 +- include/arraysfwd.h | 52 ++ include/phdf5.h | 5 +- inputoutput/Collective.cpp | 1 + inputoutput/WriteOutputParallel.cpp | 33 +- inputoutput/phdf5.cpp | 5 +- main/iPic3Dlib.cpp | 1 + particles/Particles3D.cpp | 64 +-- tests/Makefile | 5 +- tests/test_arrays.cpp | 107 ++-- 25 files changed, 1439 insertions(+), 1346 deletions(-) create mode 100644 include/arraysfwd.h diff --git a/ConfigFile/src/ConfigFile.cpp b/ConfigFile/src/ConfigFile.cpp index ed9f6b53..173833dd 100644 --- a/ConfigFile/src/ConfigFile.cpp +++ b/ConfigFile/src/ConfigFile.cpp @@ -2,6 +2,7 @@ #include "ConfigFile.h" #include "errors.h" +#include "debug.h" using std::string; diff --git a/PSKOutput3D/PSKhdf5adaptor.cpp b/PSKOutput3D/PSKhdf5adaptor.cpp index 8a4d2d14..f6033586 100644 --- a/PSKOutput3D/PSKhdf5adaptor.cpp +++ b/PSKOutput3D/PSKhdf5adaptor.cpp @@ -658,10 +658,10 @@ void HDF5OutputAdaptor::write(const std::string & tag, const Dimens dimens, cons //} } -void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, double ***d_array) { +void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, const_arr3_double d_array) { if (dimens.size() != 3) { eprintf("Dimens size not 3 for object %s", objname.c_str()); - //PSK::OutputException e("Dimens size not 3 for object " + objname, "HDF5OutputAdaptor::write(double*** array)"); + //PSK::OutputException e("Dimens size not 3 for object " + objname, "HDF5OutputAdaptor::write(const_arr3_double array)"); //throw e; } @@ -689,15 +689,15 @@ void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, delete[]d_array_p; //} //catch(PSK::Exception & e) { - // e.push("In HDF5OutputAdaptor::write(double*** array)"); + // e.push("In HDF5OutputAdaptor::write(const_arr3_double array)"); // throw e; //} } -void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, const int ns, double ****d_array) { +void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, const int ns, const_arr4_double d_array) { if (dimens.size() != 3) { eprintf("Dimens size not 3 for object %s", objname.c_str()); - //PSK::OutputException e("Dimens size not 3 for object " + objname, "HDF5OutputAdaptor::write(double**** array)"); + //PSK::OutputException e("Dimens size not 3 for object " + objname, "HDF5OutputAdaptor::write(const_arr4_double array)"); //throw e; } @@ -724,7 +724,7 @@ void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, delete[]d_array_p; //} //catch(PSK::Exception & e) { - // e.push("In HDF5OutputAdaptor::write(double**** array)"); + // e.push("In HDF5OutputAdaptor::write(const_arr4_double array)"); // throw e; //} } @@ -752,10 +752,10 @@ void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, //} } -void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, const int ns, double ***d_array) { +void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, const int ns, const_arr3_double d_array) { if (dimens.size() != 2) { eprintf("Dimens size not 2 for object %s", objname.c_str()); - //PSK::OutputException e("Dimens size not 2 for object " + objname, "HDF5OutputAdaptor::write(double*** array)"); + //PSK::OutputException e("Dimens size not 2 for object " + objname, "HDF5OutputAdaptor::write(const_arr3_double array)"); //throw e; } @@ -770,7 +770,7 @@ void HDF5OutputAdaptor::write(const std::string & objname, const Dimens dimens, write(objname, dimens, d_array_p); delete[]d_array_p; //} catch(PSK::Exception & e) { - // e.push("In HDF5OutputAdaptor::write(double*** array)"); + // e.push("In HDF5OutputAdaptor::write(const_arr3_double array)"); // throw e; //} } diff --git a/communication/ComInterpNodes3D.cpp b/communication/ComInterpNodes3D.cpp index 1e24dd73..d04e6765 100644 --- a/communication/ComInterpNodes3D.cpp +++ b/communication/ComInterpNodes3D.cpp @@ -1,9 +1,10 @@ #include "ComInterpNodes3D.h" #include "ipicdefs.h" +#include "Alloc.h" /** communicate ghost cells and sum the contribution with a index indicating the number of species*/ -void communicateInterp(int nx, int ny, int nz, int ns, double ****vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { +void communicateInterp(int nx, int ny, int nz, int ns, double**** vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; diff --git a/communication/ComNodes3D.cpp b/communication/ComNodes3D.cpp index 977494f4..5d6f4424 100644 --- a/communication/ComNodes3D.cpp +++ b/communication/ComNodes3D.cpp @@ -2,10 +2,12 @@ #include "ComNodes3D.h" #include "TimeTasks.h" #include "ipicdefs.h" +#include "Alloc.h" /** communicate ghost cells (FOR NODES) */ -void communicateNode(int nx, int ny, int nz, double ***vector, VirtualTopology3D * vct) { +void communicateNode(int nx, int ny, int nz, arr3_double& _vector, VirtualTopology3D * vct) { timeTasks.start_communicate(); + double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; @@ -107,8 +109,9 @@ void communicateNode(int nx, int ny, int nz, double ***vector, VirtualTopology3D timeTasks.addto_communicate(); } /** communicate ghost cells (FOR NODES) */ -void communicateNodeBC(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { +void communicateNodeBC(int nx, int ny, int nz, arr3_double& _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks.start_communicate(); + double ***vector = _vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -212,8 +215,9 @@ void communicateNodeBC(int nx, int ny, int nz, double ***vector, int bcFaceXrigh timeTasks.addto_communicate(); } /** communicate ghost cells (FOR NODES) with particles BC*/ -void communicateNodeBC_P(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { +void communicateNodeBC_P(int nx, int ny, int nz, arr3_double& _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks.start_communicate(); + double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -318,9 +322,10 @@ void communicateNodeBC_P(int nx, int ny, int nz, double ***vector, int bcFaceXri } /** SPECIES: communicate ghost cells */ -void communicateNode(int nx, int ny, int nz, double ****vector, int ns, VirtualTopology3D * vct) { - +void communicateNode(int nx, int ny, int nz, arr4_double& _vector, int ns, VirtualTopology3D * vct) { timeTasks.start_communicate(); + double ****vector = _vector.fetch_arr4(); + // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -422,8 +427,9 @@ void communicateNode(int nx, int ny, int nz, double ****vector, int ns, VirtualT // PARTICLES /** SPECIES: communicate ghost cells */ -void communicateNode_P(int nx, int ny, int nz, double ****vector, int ns, VirtualTopology3D * vct) { +void communicateNode_P(int nx, int ny, int nz, arr4_double& _vector, int ns, VirtualTopology3D * vct) { timeTasks.start_communicate(); + double ****vector = _vector.fetch_arr4(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; @@ -526,9 +532,10 @@ void communicateNode_P(int nx, int ny, int nz, double ****vector, int ns, Virtua // /** communicate ghost cells (FOR CENTERS) */ -void communicateCenter(int nx, int ny, int nz, double ***vector, VirtualTopology3D * vct) { - +void communicateCenter(int nx, int ny, int nz, arr3_double& _vector, VirtualTopology3D * vct) { timeTasks.start_communicate(); + double ***vector = _vector.fetch_arr3(); + // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -627,8 +634,9 @@ void communicateCenter(int nx, int ny, int nz, double ***vector, VirtualTopology timeTasks.addto_communicate(); } /** communicate ghost cells (FOR CENTERS) with BOX stencil*/ -void communicateCenterBoxStencilBC(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { +void communicateCenterBoxStencilBC(int nx, int ny, int nz, arr3_double& _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks.start_communicate(); + double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -659,8 +667,9 @@ void communicateCenterBoxStencilBC(int nx, int ny, int nz, double ***vector, int } // particles /** communicate ghost cells (FOR CENTERS) with BOX stencil*/ -void communicateCenterBoxStencilBC_P(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { +void communicateCenterBoxStencilBC_P(int nx, int ny, int nz, arr3_double& _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks.start_communicate(); + double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -693,8 +702,9 @@ void communicateCenterBoxStencilBC_P(int nx, int ny, int nz, double ***vector, i // -void communicateNodeBoxStencilBC(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { +void communicateNodeBoxStencilBC(int nx, int ny, int nz, arr3_double& _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks.start_communicate(); + double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -724,8 +734,9 @@ void communicateNodeBoxStencilBC(int nx, int ny, int nz, double ***vector, int b timeTasks.addto_communicate(); } -void communicateNodeBoxStencilBC_P(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { +void communicateNodeBoxStencilBC_P(int nx, int ny, int nz, arr3_double& _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks.start_communicate(); + double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; double *ghostXleftFace = new double[(ny - 2) * (nz - 2)]; @@ -758,8 +769,9 @@ void communicateNodeBoxStencilBC_P(int nx, int ny, int nz, double ***vector, int /** SPECIES: communicate ghost cells */ -void communicateCenter(int nx, int ny, int nz, double ****vector, int ns, VirtualTopology3D * vct) { +void communicateCenter(int nx, int ny, int nz, arr4_double& _vector, int ns, VirtualTopology3D * vct) { timeTasks.start_communicate(); + double ****vector=_vector.fetch_arr4(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; @@ -858,8 +870,9 @@ void communicateCenter(int nx, int ny, int nz, double ****vector, int ns, Virtua timeTasks.addto_communicate(); } // /////////// communication + BC //////////////////////////// -void communicateCenterBC(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { +void communicateCenterBC(int nx, int ny, int nz, arr3_double& _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks.start_communicate(); + double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; @@ -961,8 +974,9 @@ void communicateCenterBC(int nx, int ny, int nz, double ***vector, int bcFaceXri timeTasks.addto_communicate(); } // /////////// communication + BC //////////////////////////// -void communicateCenterBC_P(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { +void communicateCenterBC_P(int nx, int ny, int nz, arr3_double& _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks.start_communicate(); + double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; diff --git a/communication/ComParser3D.cpp b/communication/ComParser3D.cpp index e7996e73..18e374a0 100644 --- a/communication/ComParser3D.cpp +++ b/communication/ComParser3D.cpp @@ -1,4 +1,5 @@ +#include #include "ComParser3D.h" /** swap the buffer */ diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index d430aae2..c01b149c 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -7,39 +7,137 @@ #include "ompdefs.h" /*! constructor */ -EMfields3D::EMfields3D(Collective * col, Grid * grid) { - nxc = grid->getNXC(); - nxn = grid->getNXN(); - nyc = grid->getNYC(); - nyn = grid->getNYN(); - nzc = grid->getNZC(); - nzn = grid->getNZN(); - dx = grid->getDX(); - dy = grid->getDY(); - dz = grid->getDZ(); - invVOL = grid->getInvVOL(); - xStart = grid->getXstart(); - xEnd = grid->getXend(); - yStart = grid->getYstart(); - yEnd = grid->getYend(); - zStart = grid->getZstart(); - zEnd = grid->getZend(); - Lx = col->getLx(); - Ly = col->getLy(); - Lz = col->getLz(); - ns = col->getNs(); - c = col->getC(); - dt = col->getDt(); - th = col->getTh(); - ue0 = col->getU0(0); - ve0 = col->getV0(0); - we0 = col->getW0(0); - x_center = col->getx_center(); - y_center = col->gety_center(); - z_center = col->getz_center(); - L_square = col->getL_square(); - - delt = c * th * dt; +// +// We rely on the following rule from the C++ standard, section 12.6.2.5: +// +// nonstatic data members shall be initialized in the order +// they were declared in the class definition +// +// in particular, nxc, nyc, nzc and nxn, nyn, nzn are assumed +// initialized when subsequently used. +// +EMfields3D::EMfields3D(Collective * col, Grid * grid) : + nxc(grid->getNXC()), + nxn(grid->getNXN()), + nyc(grid->getNYC()), + nyn(grid->getNYN()), + nzc(grid->getNZC()), + nzn(grid->getNZN()), + dx(grid->getDX()), + dy(grid->getDY()), + dz(grid->getDZ()), + invVOL(grid->getInvVOL()), + xStart(grid->getXstart()), + xEnd(grid->getXend()), + yStart(grid->getYstart()), + yEnd(grid->getYend()), + zStart(grid->getZstart()), + zEnd(grid->getZend()), + Lx(col->getLx()), + Ly(col->getLy()), + Lz(col->getLz()), + ns(col->getNs()), + c(col->getC()), + dt(col->getDt()), + th(col->getTh()), + ue0(col->getU0(0)), + ve0(col->getV0(0)), + we0(col->getW0(0)), + x_center(col->getx_center()), + y_center(col->gety_center()), + z_center(col->getz_center()), + L_square(col->getL_square()), + delt (c*th*dt), // declared after these + // + // array allocation: nodes + // + Ex (nxn, nyn, nzn), + Ey (nxn, nyn, nzn), + Ez (nxn, nyn, nzn), + Exth (nxn, nyn, nzn), + Eyth (nxn, nyn, nzn), + Ezth (nxn, nyn, nzn), + Bxn (nxn, nyn, nzn), + Byn (nxn, nyn, nzn), + Bzn (nxn, nyn, nzn), + rhon (nxn, nyn, nzn), + Jx (nxn, nyn, nzn), + Jy (nxn, nyn, nzn), + Jz (nxn, nyn, nzn), + Jxh (nxn, nyn, nzn), + Jyh (nxn, nyn, nzn), + Jzh (nxn, nyn, nzn), + // + // species-specific quantities + // + rhons (ns, nxn, nyn, nzn), + rhocs (ns, nxc, nyc, nzc), + Jxs (ns, nxn, nyn, nzn), + Jys (ns, nxn, nyn, nzn), + Jzs (ns, nxn, nyn, nzn), + pXXsn (ns, nxn, nyn, nzn), + pXYsn (ns, nxn, nyn, nzn), + pXZsn (ns, nxn, nyn, nzn), + pYYsn (ns, nxn, nyn, nzn), + pYZsn (ns, nxn, nyn, nzn), + pZZsn (ns, nxn, nyn, nzn), + + // array allocation: central points + // + PHI (nxc, nyc, nzc), + Bxc (nxc, nyc, nzc), + Byc (nxc, nyc, nzc), + Bzc (nxc, nyc, nzc), + rhoc (nxc, nyc, nzc), + rhoh (nxc, nyc, nzc), + + // temporary arrays + // + tempXC (nxc, nyc, nzc), + tempYC (nxc, nyc, nzc), + tempZC (nxc, nyc, nzc), + // + tempXN (nxn, nyn, nzn), + tempYN (nxn, nyn, nzn), + tempZN (nxn, nyn, nzn), + tempC (nxc, nyc, nzc), + tempX (nxn, nyn, nzn), + tempY (nxn, nyn, nzn), + tempZ (nxn, nyn, nzn), + temp2X (nxn, nyn, nzn), + temp2Y (nxn, nyn, nzn), + temp2Z (nxn, nyn, nzn), + imageX (nxn, nyn, nzn), + imageY (nxn, nyn, nzn), + imageZ (nxn, nyn, nzn), + Dx (nxn, nyn, nzn), + Dy (nxn, nyn, nzn), + Dz (nxn, nyn, nzn), + vectX (nxn, nyn, nzn), + vectY (nxn, nyn, nzn), + vectZ (nxn, nyn, nzn), + divC (nxc, nyc, nzc), + // B_ext and J_ext should not be allocated unless used. + Bx_ext(nxn,nyn,nzn), + By_ext(nxn,nyn,nzn), + Bz_ext(nxn,nyn,nzn), + Jx_ext(nxn,nyn,nzn), + Jy_ext(nxn,nyn,nzn), + Jz_ext(nxn,nyn,nzn) +{ + // External imposed fields + // + B1x = col->getB1x(); + B1y = col->getB1y(); + B1z = col->getB1z(); + if(B1x!=0. || B1y !=0. || B1z!=0.) + { + eprintf("This functionality has not yet been implemented"); + } + Bx_ext.setall(0.); + By_ext.setall(0.); + Bz_ext.setall(0.); + // PoissonCorrection = false; if (col->getPoissonCorrection()=="yes") PoissonCorrection = true; CGtol = col->getCGtol(); @@ -49,11 +147,11 @@ EMfields3D::EMfields3D(Collective * col, Grid * grid) { qom[i] = col->getQOM(i); // boundary conditions: PHI and EM fields bcPHIfaceXright = col->getBcPHIfaceXright(); - bcPHIfaceXleft = col->getBcPHIfaceXleft(); + bcPHIfaceXleft = col->getBcPHIfaceXleft(); bcPHIfaceYright = col->getBcPHIfaceYright(); - bcPHIfaceYleft = col->getBcPHIfaceYleft(); + bcPHIfaceYleft = col->getBcPHIfaceYleft(); bcPHIfaceZright = col->getBcPHIfaceZright(); - bcPHIfaceZleft = col->getBcPHIfaceZleft(); + bcPHIfaceZleft = col->getBcPHIfaceZleft(); bcEMfaceXright = col->getBcEMfaceXright(); bcEMfaceXleft = col->getBcEMfaceXleft(); @@ -65,10 +163,6 @@ EMfields3D::EMfields3D(Collective * col, Grid * grid) { B0x = col->getB0x(); B0y = col->getB0y(); B0z = col->getB0z(); - // Earth Simulation - B1x = col->getB1x(); - B1y = col->getB1y(); - B1z = col->getB1z(); delta = col->getDelta(); Smooth = col->getSmooth(); // get the density background for the gem Challange @@ -96,75 +190,6 @@ EMfields3D::EMfields3D(Collective * col, Grid * grid) { injFieldsFront = new injInfoFields(nxn, nyn, nzn); injFieldsRear = new injInfoFields(nxn, nyn, nzn); - // arrays allocation: nodes - Ex = newArr3(double, nxn, nyn, nzn); - Ey = newArr3(double, nxn, nyn, nzn); - Ez = newArr3(double, nxn, nyn, nzn); - Exth = newArr3(double, nxn, nyn, nzn); - Eyth = newArr3(double, nxn, nyn, nzn); - Ezth = newArr3(double, nxn, nyn, nzn); - Bxn = newArr3(double, nxn, nyn, nzn); - Byn = newArr3(double, nxn, nyn, nzn); - Bzn = newArr3(double, nxn, nyn, nzn); - rhon = newArr3(double, nxn, nyn, nzn); - Jx = newArr3(double, nxn, nyn, nzn); - Jy = newArr3(double, nxn, nyn, nzn); - Jz = newArr3(double, nxn, nyn, nzn); - Jxh = newArr3(double, nxn, nyn, nzn); - Jyh = newArr3(double, nxn, nyn, nzn); - Jzh = newArr3(double, nxn, nyn, nzn); - // External imposed fields - Bx_ext = newArr3(double,nxn,nyn,nzn); - By_ext = newArr3(double,nxn,nyn,nzn); - Bz_ext = newArr3(double,nxn,nyn,nzn); - Jx_ext = newArr3(double,nxn,nyn,nzn); - Jy_ext = newArr3(double,nxn,nyn,nzn); - Jz_ext = newArr3(double,nxn,nyn,nzn); - // involving species - rhons = newArr4(double, ns, nxn, nyn, nzn); - rhocs = newArr4(double, ns, nxc, nyc, nzc); - Jxs = newArr4(double, ns, nxn, nyn, nzn); - Jys = newArr4(double, ns, nxn, nyn, nzn); - Jzs = newArr4(double, ns, nxn, nyn, nzn); - pXXsn = newArr4(double, ns, nxn, nyn, nzn); - pXYsn = newArr4(double, ns, nxn, nyn, nzn); - pXZsn = newArr4(double, ns, nxn, nyn, nzn); - pYYsn = newArr4(double, ns, nxn, nyn, nzn); - pYZsn = newArr4(double, ns, nxn, nyn, nzn); - pZZsn = newArr4(double, ns, nxn, nyn, nzn); - // arrays allocation: central points - PHI = newArr3(double, nxc, nyc, nzc); - Bxc = newArr3(double, nxc, nyc, nzc); - Byc = newArr3(double, nxc, nyc, nzc); - Bzc = newArr3(double, nxc, nyc, nzc); - rhoc = newArr3(double, nxc, nyc, nzc); - rhoh = newArr3(double, nxc, nyc, nzc); - - // temporary arrays - tempXC = newArr3(double, nxc, nyc, nzc); - tempYC = newArr3(double, nxc, nyc, nzc); - tempZC = newArr3(double, nxc, nyc, nzc); - - tempXN = newArr3(double, nxn, nyn, nzn); - tempYN = newArr3(double, nxn, nyn, nzn); - tempZN = newArr3(double, nxn, nyn, nzn); - tempC = newArr3(double, nxc, nyc, nzc); - tempX = newArr3(double, nxn, nyn, nzn); - tempY = newArr3(double, nxn, nyn, nzn); - tempZ = newArr3(double, nxn, nyn, nzn); - temp2X = newArr3(double, nxn, nyn, nzn); - temp2Y = newArr3(double, nxn, nyn, nzn); - temp2Z = newArr3(double, nxn, nyn, nzn); - imageX = newArr3(double, nxn, nyn, nzn); - imageY = newArr3(double, nxn, nyn, nzn); - imageZ = newArr3(double, nxn, nyn, nzn); - Dx = newArr3(double, nxn, nyn, nzn); - Dy = newArr3(double, nxn, nyn, nzn); - Dz = newArr3(double, nxn, nyn, nzn); - vectX = newArr3(double, nxn, nyn, nzn); - vectY = newArr3(double, nxn, nyn, nzn); - vectZ = newArr3(double, nxn, nyn, nzn); - divC = newArr3(double, nxc, nyc, nzc); sizeMomentsArray = omp_thread_count(); momentsArray = new Moments*[sizeMomentsArray]; for(int i=0;igetCartesian_rank() == 0) cout << "*** E CALCULATION ***" << endl; - double ***divE = newArr3(double, nxc, nyc, nzc); - double ***gradPHIX = newArr3(double, nxn, nyn, nzn); - double ***gradPHIY = newArr3(double, nxn, nyn, nzn); - double ***gradPHIZ = newArr3(double, nxn, nyn, nzn); + array3_double divE (nxc, nyc, nzc); + array3_double gradPHIX (nxn, nyn, nzn); + array3_double gradPHIY (nxn, nyn, nzn); + array3_double gradPHIZ (nxn, nyn, nzn); double *xkrylov = new double[3 * (nxn - 2) * (nyn - 2) * (nzn - 2)]; // 3 E components double *bkrylov = new double[3 * (nxn - 2) * (nyn - 2) * (nzn - 2)]; // 3 components @@ -513,11 +538,6 @@ void EMfields3D::calculateE(Grid * grid, VirtualTopology3D * vct, Collective *co delete[]bkrylov; delete[]xkrylovPoisson; delete[]bkrylovPoisson; - delArr3(divE, nxc, nyc); - delArr3(gradPHIX, nxn, nyn); - delArr3(gradPHIY, nxn, nyn); - delArr3(gradPHIZ, nxn, nyn); - } /*! Calculate sorgent for Maxwell solver */ @@ -679,7 +699,7 @@ void EMfields3D::MaxwellImage(double *im, double *vector, Grid * grid, VirtualTo } /*! Calculate PI dot (vectX, vectY, vectZ) */ -void EMfields3D::PIdot(double ***PIdotX, double ***PIdotY, double ***PIdotZ, double ***vectX, double ***vectY, double ***vectZ, int ns, Grid * grid) { +void EMfields3D::PIdot(arr3_double& PIdotX, arr3_double& PIdotY, arr3_double& PIdotZ, const_arr3_double& vectX, const_arr3_double& vectY, const_arr3_double& vectZ, int ns, Grid * grid) { double beta, edotb, omcx, omcy, omcz, denom; beta = .5 * qom[ns] * dt / c; for (int i = 1; i < nxn - 1; i++) @@ -688,17 +708,17 @@ void EMfields3D::PIdot(double ***PIdotX, double ***PIdotY, double ***PIdotZ, dou omcx = beta * (Bxn[i][j][k] + Bx_ext[i][j][k]); omcy = beta * (Byn[i][j][k] + By_ext[i][j][k]); omcz = beta * (Bzn[i][j][k] + Bz_ext[i][j][k]); - edotb = vectX[i][j][k] * omcx + vectY[i][j][k] * omcy + vectZ[i][j][k] * omcz; + edotb = vectX.get(i,j,k) * omcx + vectY.get(i,j,k) * omcy + vectZ.get(i,j,k) * omcz; denom = 1 / (1.0 + omcx * omcx + omcy * omcy + omcz * omcz); - PIdotX[i][j][k] += (vectX[i][j][k] + (vectY[i][j][k] * omcz - vectZ[i][j][k] * omcy + edotb * omcx)) * denom; - PIdotY[i][j][k] += (vectY[i][j][k] + (vectZ[i][j][k] * omcx - vectX[i][j][k] * omcz + edotb * omcy)) * denom; - PIdotZ[i][j][k] += (vectZ[i][j][k] + (vectX[i][j][k] * omcy - vectY[i][j][k] * omcx + edotb * omcz)) * denom; + PIdotX.fetch(i,j,k) += (vectX.get(i,j,k) + (vectY.get(i,j,k) * omcz - vectZ.get(i,j,k) * omcy + edotb * omcx)) * denom; + PIdotY.fetch(i,j,k) += (vectY.get(i,j,k) + (vectZ.get(i,j,k) * omcx - vectX.get(i,j,k) * omcz + edotb * omcy)) * denom; + PIdotZ.fetch(i,j,k) += (vectZ.get(i,j,k) + (vectX.get(i,j,k) * omcy - vectY.get(i,j,k) * omcx + edotb * omcz)) * denom; } - - } /*! Calculate MU dot (vectX, vectY, vectZ) */ -void EMfields3D::MUdot(double ***MUdotX, double ***MUdotY, double ***MUdotZ, double ***vectX, double ***vectY, double ***vectZ, Grid * grid) { +void EMfields3D::MUdot(arr3_double& MUdotX, arr3_double& MUdotY, arr3_double& MUdotZ, + const_arr3_double& vectX, const_arr3_double& vectY, const_arr3_double& vectZ, Grid * grid) +{ double beta, edotb, omcx, omcy, omcz, denom; for (int i = 1; i < nxn - 1; i++) for (int j = 1; j < nyn - 1; j++) @@ -715,18 +735,16 @@ void EMfields3D::MUdot(double ***MUdotX, double ***MUdotY, double ***MUdotZ, dou omcx = beta * (Bxn[i][j][k] + Bx_ext[i][j][k]); omcy = beta * (Byn[i][j][k] + By_ext[i][j][k]); omcz = beta * (Bzn[i][j][k] + Bz_ext[i][j][k]); - edotb = vectX[i][j][k] * omcx + vectY[i][j][k] * omcy + vectZ[i][j][k] * omcz; + edotb = vectX.get(i,j,k) * omcx + vectY.get(i,j,k) * omcy + vectZ.get(i,j,k) * omcz; denom = FourPI / 2 * delt * dt / c * qom[is] * rhons[is][i][j][k] / (1.0 + omcx * omcx + omcy * omcy + omcz * omcz); - MUdotX[i][j][k] += (vectX[i][j][k] + (vectY[i][j][k] * omcz - vectZ[i][j][k] * omcy + edotb * omcx)) * denom; - MUdotY[i][j][k] += (vectY[i][j][k] + (vectZ[i][j][k] * omcx - vectX[i][j][k] * omcz + edotb * omcy)) * denom; - MUdotZ[i][j][k] += (vectZ[i][j][k] + (vectX[i][j][k] * omcy - vectY[i][j][k] * omcx + edotb * omcz)) * denom; + MUdotX.fetch(i,j,k) += (vectX.get(i,j,k) + (vectY.get(i,j,k) * omcz - vectZ.get(i,j,k) * omcy + edotb * omcx)) * denom; + MUdotY.fetch(i,j,k) += (vectY.get(i,j,k) + (vectZ.get(i,j,k) * omcx - vectX.get(i,j,k) * omcz + edotb * omcy)) * denom; + MUdotZ.fetch(i,j,k) += (vectZ.get(i,j,k) + (vectX.get(i,j,k) * omcy - vectY.get(i,j,k) * omcx + edotb * omcz)) * denom; } - } - } /* Interpolation smoothing: Smoothing (vector must already have ghost cells) TO MAKE SMOOTH value as to be different from 1.0 type = 0 --> center based vector ; type = 1 --> node based vector ; */ -void EMfields3D::smooth(double value, double ***vector, int type, Grid * grid, VirtualTopology3D * vct) { +void EMfields3D::smooth(double value, arr3_double& vector, int type, Grid * grid, VirtualTopology3D * vct) { int nvolte = 6; for (int icount = 1; icount < nvolte + 1; icount++) { @@ -823,7 +841,7 @@ void EMfields3D::smoothE(double value, VirtualTopology3D * vct, Collective *col) } /* SPECIES: Interpolation smoothing TO MAKE SMOOTH value as to be different from 1.0 type = 0 --> center based vector type = 1 --> node based vector */ -void EMfields3D::smooth(double value, double ****vector, int is, int type, Grid * grid, VirtualTopology3D * vct) { +void EMfields3D::smooth(double value, arr4_double& vector, int is, int type, Grid * grid, VirtualTopology3D * vct) { cout << "Smoothing for Species not implemented in 3D" << endl; } @@ -1314,8 +1332,8 @@ void EMfields3D::calculateHatFunctions(Grid * grid, VirtualTopology3D * vct) { /*! Image of Poisson Solver */ void EMfields3D::PoissonImage(double *image, double *vector, Grid * grid, VirtualTopology3D * vct) { // allocate 2 three dimensional service vectors - double ***temp = newArr3(double, nxc, nyc, nzc); - double ***im = newArr3(double, nxc, nyc, nzc); + array3_double temp(nxc, nyc, nzc); + array3_double im(nxc, nyc, nzc); eqValue(0.0, image, (nxc - 2) * (nyc - 2) * (nzc - 2)); eqValue(0.0, temp, nxc, nyc, nzc); eqValue(0.0, im, nxc, nyc, nzc); @@ -1325,9 +1343,6 @@ void EMfields3D::PoissonImage(double *image, double *vector, Grid * grid, Virtua grid->lapC2Cpoisson(im, temp, vct); // move from physical space to krylov space phys2solver(image, im, nxc, nyc, nzc); - // deallocate temporary array and objects - delArr3(temp, nxc, nyc); - delArr3(im, nxc, nyc); } /*! interpolate charge density and pressure density from node to center */ void EMfields3D::interpDensitiesN2C(VirtualTopology3D * vct, Grid * grid) { @@ -1339,25 +1354,25 @@ void EMfields3D::communicateGhostP2G(int ns, int bcFaceXright, int bcFaceXleft, // interpolate adding common nodes among processors timeTasks.start_communicate(); - communicateInterp(nxn, nyn, nzn, ns, rhons, 0, 0, 0, 0, 0, 0, vct); - communicateInterp(nxn, nyn, nzn, ns, Jxs, 0, 0, 0, 0, 0, 0, vct); - communicateInterp(nxn, nyn, nzn, ns, Jys, 0, 0, 0, 0, 0, 0, vct); - communicateInterp(nxn, nyn, nzn, ns, Jzs, 0, 0, 0, 0, 0, 0, vct); - communicateInterp(nxn, nyn, nzn, ns, pXXsn, 0, 0, 0, 0, 0, 0, vct); - communicateInterp(nxn, nyn, nzn, ns, pXYsn, 0, 0, 0, 0, 0, 0, vct); - communicateInterp(nxn, nyn, nzn, ns, pXZsn, 0, 0, 0, 0, 0, 0, vct); - communicateInterp(nxn, nyn, nzn, ns, pYYsn, 0, 0, 0, 0, 0, 0, vct); - communicateInterp(nxn, nyn, nzn, ns, pYZsn, 0, 0, 0, 0, 0, 0, vct); - communicateInterp(nxn, nyn, nzn, ns, pZZsn, 0, 0, 0, 0, 0, 0, vct); + communicateInterp(nxn, nyn, nzn, ns, rhons.fetch_arr4(), 0, 0, 0, 0, 0, 0, vct); + communicateInterp(nxn, nyn, nzn, ns, Jxs .fetch_arr4(), 0, 0, 0, 0, 0, 0, vct); + communicateInterp(nxn, nyn, nzn, ns, Jys .fetch_arr4(), 0, 0, 0, 0, 0, 0, vct); + communicateInterp(nxn, nyn, nzn, ns, Jzs .fetch_arr4(), 0, 0, 0, 0, 0, 0, vct); + communicateInterp(nxn, nyn, nzn, ns, pXXsn.fetch_arr4(), 0, 0, 0, 0, 0, 0, vct); + communicateInterp(nxn, nyn, nzn, ns, pXYsn.fetch_arr4(), 0, 0, 0, 0, 0, 0, vct); + communicateInterp(nxn, nyn, nzn, ns, pXZsn.fetch_arr4(), 0, 0, 0, 0, 0, 0, vct); + communicateInterp(nxn, nyn, nzn, ns, pYYsn.fetch_arr4(), 0, 0, 0, 0, 0, 0, vct); + communicateInterp(nxn, nyn, nzn, ns, pYZsn.fetch_arr4(), 0, 0, 0, 0, 0, 0, vct); + communicateInterp(nxn, nyn, nzn, ns, pZZsn.fetch_arr4(), 0, 0, 0, 0, 0, 0, vct); // calculate the correct densities on the boundaries adjustNonPeriodicDensities(ns, vct); // put the correct values on ghost cells timeTasks.addto_communicate(); communicateNode_P(nxn, nyn, nzn, rhons, ns, vct); - communicateNode_P(nxn, nyn, nzn, Jxs, ns, vct); - communicateNode_P(nxn, nyn, nzn, Jys, ns, vct); - communicateNode_P(nxn, nyn, nzn, Jzs, ns, vct); + communicateNode_P(nxn, nyn, nzn, Jxs , ns, vct); + communicateNode_P(nxn, nyn, nzn, Jys , ns, vct); + communicateNode_P(nxn, nyn, nzn, Jzs , ns, vct); communicateNode_P(nxn, nyn, nzn, pXXsn, ns, vct); communicateNode_P(nxn, nyn, nzn, pXYsn, ns, vct); communicateNode_P(nxn, nyn, nzn, pXZsn, ns, vct); @@ -2473,7 +2488,10 @@ void EMfields3D::sustensorRightZ(double **susxz, double **susyz, double **suszz) } /*! Perfect conductor boundary conditions: LEFT wall */ -void EMfields3D::perfectConductorLeft(double ***imageX, double ***imageY, double ***imageZ, double ***vectorX, double ***vectorY, double ***vectorZ, int dir, Grid * grid) { +void EMfields3D::perfectConductorLeft(arr3_double& imageX, arr3_double& imageY, arr3_double& imageZ, + const_arr3_double& vectorX, const_arr3_double& vectorY, const_arr3_double& vectorZ, + int dir, Grid * grid) +{ double** susxy; double** susyy; double** suszy; @@ -2491,9 +2509,9 @@ void EMfields3D::perfectConductorLeft(double ***imageX, double ***imageY, double sustensorLeftX(susxx, susyx, suszx); for (int i=1; i < nyn-1;i++) for (int j=1; j < nzn-1;j++){ - imageX[1][i][j] = vectorX[1][i][j] - (Ex[1][i][j] - susyx[i][j]*vectorY[1][i][j] - suszx[i][j]*vectorZ[1][i][j] - Jxh[1][i][j]*dt*th*FourPI)/susxx[i][j]; - imageY[1][i][j] = vectorY[1][i][j] - 0.0*vectorY[2][i][j]; - imageZ[1][i][j] = vectorZ[1][i][j] - 0.0*vectorZ[2][i][j]; + imageX[1][i][j] = vectorX.get(1,i,j) - (Ex[1][i][j] - susyx[i][j]*vectorY.get(1,i,j) - suszx[i][j]*vectorZ.get(1,i,j) - Jxh[1][i][j]*dt*th*FourPI)/susxx[i][j]; + imageY[1][i][j] = vectorY.get(1,i,j) - 0.0*vectorY.get(2,i,j); + imageZ[1][i][j] = vectorZ.get(1,i,j) - 0.0*vectorZ.get(2,i,j); } delArr2(susxx,nxn); delArr2(susyx,nxn); @@ -2506,9 +2524,9 @@ void EMfields3D::perfectConductorLeft(double ***imageX, double ***imageY, double sustensorLeftY(susxy, susyy, suszy); for (int i=1; i < nxn-1;i++) for (int j=1; j < nzn-1;j++){ - imageX[i][1][j] = vectorX[i][1][j] - 0.0*vectorX[i][2][j]; - imageY[i][1][j] = vectorY[i][1][j] - (Ey[i][1][j] - susxy[i][j]*vectorX[i][1][j] - suszy[i][j]*vectorZ[i][1][j] - Jyh[i][1][j]*dt*th*FourPI)/susyy[i][j]; - imageZ[i][1][j] = vectorZ[i][1][j] - 0.0*vectorZ[i][2][j]; + imageX[i][1][j] = vectorX.get(i,1,j) - 0.0*vectorX.get(i,2,j); + imageY[i][1][j] = vectorY.get(i,1,j) - (Ey[i][1][j] - susxy[i][j]*vectorX.get(i,1,j) - suszy[i][j]*vectorZ.get(i,1,j) - Jyh[i][1][j]*dt*th*FourPI)/susyy[i][j]; + imageZ[i][1][j] = vectorZ.get(i,1,j) - 0.0*vectorZ.get(i,2,j); } delArr2(susxy,nxn); delArr2(susyy,nxn); @@ -2521,9 +2539,9 @@ void EMfields3D::perfectConductorLeft(double ***imageX, double ***imageY, double sustensorLeftZ(susxy, susyy, suszy); for (int i=1; i < nxn-1;i++) for (int j=1; j < nyn-1;j++){ - imageX[i][j][1] = vectorX[i][j][1]; - imageY[i][j][1] = vectorX[i][j][1]; - imageZ[i][j][1] = vectorZ[i][j][1] - (Ez[i][j][1] - susxz[i][j]*vectorX[i][j][1] - susyz[i][j]*vectorY[i][j][1] - Jzh[i][j][1]*dt*th*FourPI)/suszz[i][j]; + imageX[i][j][1] = vectorX.get(i,j,1); + imageY[i][j][1] = vectorX.get(i,j,1); + imageZ[i][j][1] = vectorZ.get(i,j,1) - (Ez[i][j][1] - susxz[i][j]*vectorX.get(i,j,1) - susyz[i][j]*vectorY.get(i,j,1) - Jzh[i][j][1]*dt*th*FourPI)/suszz[i][j]; } delArr2(susxz,nxn); delArr2(susyz,nxn); @@ -2533,7 +2551,13 @@ void EMfields3D::perfectConductorLeft(double ***imageX, double ***imageY, double } /*! Perfect conductor boundary conditions: RIGHT wall */ -void EMfields3D::perfectConductorRight(double ***imageX, double ***imageY, double ***imageZ, double ***vectorX, double ***vectorY, double ***vectorZ, int dir, Grid * grid) { +void EMfields3D::perfectConductorRight( + arr3_double& imageX, arr3_double& imageY, arr3_double& imageZ, + const_arr3_double& vectorX, + const_arr3_double& vectorY, + const_arr3_double& vectorZ, + int dir, Grid * grid) +{ double beta, omcx, omcy, omcz, denom; double** susxy; double** susyy; @@ -2552,9 +2576,9 @@ void EMfields3D::perfectConductorRight(double ***imageX, double ***imageY, doubl sustensorRightX(susxx, susyx, suszx); for (int i=1; i < nyn-1;i++) for (int j=1; j < nzn-1;j++){ - imageX[nxn-2][i][j] = vectorX[nxn-2][i][j] - (Ex[nxn-2][i][j] - susyx[i][j]*vectorY[nxn-2][i][j] - suszx[i][j]*vectorZ[nxn-2][i][j] - Jxh[nxn-2][i][j]*dt*th*FourPI)/susxx[i][j]; - imageY[nxn-2][i][j] = vectorY[nxn-2][i][j] - 0.0 * vectorY[nxn-3][i][j]; - imageZ[nxn-2][i][j] = vectorZ[nxn-2][i][j] - 0.0 * vectorZ[nxn-3][i][j]; + imageX[nxn-2][i][j] = vectorX.get(nxn-2,i,j) - (Ex[nxn-2][i][j] - susyx[i][j]*vectorY.get(nxn-2,i,j) - suszx[i][j]*vectorZ.get(nxn-2,i,j) - Jxh[nxn-2][i][j]*dt*th*FourPI)/susxx[i][j]; + imageY[nxn-2][i][j] = vectorY.get(nxn-2,i,j) - 0.0 * vectorY.get(nxn-3,i,j); + imageZ[nxn-2][i][j] = vectorZ.get(nxn-2,i,j) - 0.0 * vectorZ.get(nxn-3,i,j); } delArr2(susxx,nxn); delArr2(susyx,nxn); @@ -2567,9 +2591,9 @@ void EMfields3D::perfectConductorRight(double ***imageX, double ***imageY, doubl sustensorRightY(susxy, susyy, suszy); for (int i=1; i < nxn-1;i++) for (int j=1; j < nzn-1;j++){ - imageX[i][nyn-2][j] = vectorX[i][nyn-2][j] - 0.0*vectorX[i][nyn-3][j]; - imageY[i][nyn-2][j] = vectorY[i][nyn-2][j] - (Ey[i][nyn-2][j] - susxy[i][j]*vectorX[i][nyn-2][j] - suszy[i][j]*vectorZ[i][nyn-2][j] - Jyh[i][nyn-2][j]*dt*th*FourPI)/susyy[i][j]; - imageZ[i][nyn-2][j] = vectorZ[i][nyn-2][j] - 0.0*vectorZ[i][nyn-3][j]; + imageX[i][nyn-2][j] = vectorX.get(i,nyn-2,j) - 0.0*vectorX.get(i,nyn-3,j); + imageY[i][nyn-2][j] = vectorY.get(i,nyn-2,j) - (Ey[i][nyn-2][j] - susxy[i][j]*vectorX.get(i,nyn-2,j) - suszy[i][j]*vectorZ.get(i,nyn-2,j) - Jyh[i][nyn-2][j]*dt*th*FourPI)/susyy[i][j]; + imageZ[i][nyn-2][j] = vectorZ.get(i,nyn-2,j) - 0.0*vectorZ.get(i,nyn-3,j); } delArr2(susxy,nxn); delArr2(susyy,nxn); @@ -2582,9 +2606,9 @@ void EMfields3D::perfectConductorRight(double ***imageX, double ***imageY, doubl sustensorRightZ(susxz, susyz, suszz); for (int i=1; i < nxn-1;i++) for (int j=1; j < nyn-1;j++){ - imageX[i][j][nzn-2] = vectorX[i][j][nzn-2]; - imageY[i][j][nzn-2] = vectorY[i][j][nzn-2]; - imageZ[i][j][nzn-2] = vectorZ[i][j][nzn-2] - (Ez[i][j][nzn-2] - susxz[i][j]*vectorX[i][j][nzn-2] - susyz[i][j]*vectorY[i][j][nzn-2] - Jzh[i][j][nzn-2]*dt*th*FourPI)/suszz[i][j]; + imageX[i][j][nzn-2] = vectorX.get(i,j,nzn-2); + imageY[i][j][nzn-2] = vectorY.get(i,j,nzn-2); + imageZ[i][j][nzn-2] = vectorZ.get(i,j,nzn-2) - (Ez[i][j][nzn-2] - susxz[i][j]*vectorX.get(i,j,nzn-2) - susyz[i][j]*vectorY.get(i,j,nzn-2) - Jzh[i][j][nzn-2]*dt*th*FourPI)/suszz[i][j]; } delArr2(susxz,nxn); delArr2(susyz,nxn); @@ -2594,7 +2618,7 @@ void EMfields3D::perfectConductorRight(double ***imageX, double ***imageY, doubl } /*! Perfect conductor boundary conditions for source: LEFT WALL */ -void EMfields3D::perfectConductorLeftS(double ***vectorX, double ***vectorY, double ***vectorZ, int dir) { +void EMfields3D::perfectConductorLeftS(arr3_double& vectorX, arr3_double& vectorY, arr3_double& vectorZ, int dir) { double ebc[3]; @@ -2640,7 +2664,7 @@ void EMfields3D::perfectConductorLeftS(double ***vectorX, double ***vectorY, dou } /*! Perfect conductor boundary conditions for source: RIGHT WALL */ -void EMfields3D::perfectConductorRightS(double ***vectorX, double ***vectorY, double ***vectorZ, int dir) { +void EMfields3D::perfectConductorRightS(arr3_double& vectorX, arr3_double& vectorY, arr3_double& vectorZ, int dir) { double ebc[3]; @@ -2805,7 +2829,10 @@ void EMfields3D::updateInfoFields(Grid *grid,VirtualTopology3D *vct,Collective * } -void EMfields3D::BoundaryConditionsEImage(double ***imageX, double ***imageY, double ***imageZ,double ***vectorX, double ***vectorY, double ***vectorZ,int nx, int ny, int nz, VirtualTopology3D *vct,Grid *grid){ +void EMfields3D::BoundaryConditionsEImage(arr3_double& imageX, arr3_double& imageY, arr3_double& imageZ, + const_arr3_double& vectorX, const_arr3_double& vectorY, const_arr3_double& vectorZ, + int nx, int ny, int nz, VirtualTopology3D *vct,Grid *grid) +{ if(vct->getXleft_neighbor()==MPI_PROC_NULL && bcEMfaceXleft == 2) { for (int j=1; j < ny-1;j++) @@ -2865,7 +2892,7 @@ void EMfields3D::BoundaryConditionsEImage(double ***imageX, double ***imageY, do } -void EMfields3D::BoundaryConditionsB(double ***vectorX, double ***vectorY, double ***vectorZ,int nx, int ny, int nz,Grid *grid, VirtualTopology3D *vct){ +void EMfields3D::BoundaryConditionsB(arr3_double& vectorX, arr3_double& vectorY, arr3_double& vectorZ,int nx, int ny, int nz,Grid *grid, VirtualTopology3D *vct){ if(vct->getXleft_neighbor()==MPI_PROC_NULL && bcEMfaceXleft ==2) { for (int j=0; j < ny;j++) @@ -2948,7 +2975,7 @@ void EMfields3D::BoundaryConditionsB(double ***vectorX, double ***vectorY, doubl } -void EMfields3D::BoundaryConditionsE(double ***vectorX, double ***vectorY, double ***vectorZ,int nx, int ny, int nz,Grid *grid, VirtualTopology3D *vct){ +void EMfields3D::BoundaryConditionsE(arr3_double& vectorX, arr3_double& vectorY, arr3_double& vectorZ,int nx, int ny, int nz,Grid *grid, VirtualTopology3D *vct){ if(vct->getXleft_neighbor()==MPI_PROC_NULL && bcEMfaceXleft ==2) { for (int j=0; j < ny;j++) @@ -3030,338 +3057,105 @@ void EMfields3D::BoundaryConditionsE(double ***vectorX, double ***vectorY, doubl } } -/*! get Potential array ** */ -double ***EMfields3D::getPHI() { - return (PHI); -} -/*! get Ex(X,Y,Z) */ -double &EMfields3D::getEx(int indexX, int indexY, int indexZ) const { - return (Ex[indexX][indexY][indexZ]); -} -/*! get Electric field component X array */ -double ***EMfields3D::getEx() { - return (Ex); -} /*! get Electric Field component X array cell without the ghost cells */ -double ***EMfields3D::getExc(Grid3DCU *grid) { - double ***arr; - double ***tmp; - - arr = newArr3(double,nxc-2,nyc-2,nzc-2); - tmp = newArr3(double,nxc,nyc,nzc); +void EMfields3D::getExc(arr3_double& arr, Grid3DCU *grid) { + array3_double tmp(nxc,nyc,nzc); grid->interpN2C(tmp, Ex); for (int i = 1; i < nxc-1; i++) for (int j = 1; j < nyc-1; j++) for (int k = 1; k < nzc-1; k++) arr[i-1][j-1][k-1]=tmp[i][j][k]; - - delArr3(tmp,nxc,nyc); - return arr; -} -/*! get Ey(X,Y,Z) */ -double &EMfields3D::getEy(int indexX, int indexY, int indexZ) const { - return (Ey[indexX][indexY][indexZ]); -} -/*! get Electric field component Y array */ -double ***EMfields3D::getEy() { - return (Ey); } /*! get Electric Field component Y array cell without the ghost cells */ -double ***EMfields3D::getEyc(Grid3DCU *grid) { - double ***arr; - double ***tmp; - - arr = newArr3(double,nxc-2,nyc-2,nzc-2); - tmp = newArr3(double,nxc,nyc,nzc); +void EMfields3D::getEyc(arr3_double& arr, Grid3DCU *grid) { + array3_double tmp(nxc,nyc,nzc); grid->interpN2C(tmp, Ey); for (int i = 1; i < nxc-1; i++) for (int j = 1; j < nyc-1; j++) for (int k = 1; k < nzc-1; k++) arr[i-1][j-1][k-1]=tmp[i][j][k]; - - delArr3(tmp,nxc,nyc); - return arr; -} -/*! get Ez(X,Y,Z) */ -double &EMfields3D::getEz(int indexX, int indexY, int indexZ) const { - return (Ez[indexX][indexY][indexZ]); -} -/*! get Electric field component Z array */ -double ***EMfields3D::getEz() { - return (Ez); } /*! get Electric Field component Z array cell without the ghost cells */ -double ***EMfields3D::getEzc(Grid3DCU *grid) { - double ***arr; - double ***tmp; - - arr = newArr3(double,nxc-2,nyc-2,nzc-2); - tmp = newArr3(double,nxc,nyc,nzc); +void EMfields3D::getEzc(arr3_double& arr, Grid3DCU *grid) { + array3_double tmp(nxc,nyc,nzc); grid->interpN2C(tmp, Ez); for (int i = 1; i < nxc-1; i++) for (int j = 1; j < nyc-1; j++) for (int k = 1; k < nzc-1; k++) arr[i-1][j-1][k-1]=tmp[i][j][k]; - - delArr3(tmp,nxc,nyc); - return arr; -} -/*! get Bx(X,Y,Z) */ -double &EMfields3D::getBx(int indexX, int indexY, int indexZ) const { - return (Bxn[indexX][indexY][indexZ]); -} -/*! get Magnetic Field component X array */ -double ***EMfields3D::getBx() { - return (Bxn); } /*! get Magnetic Field component X array cell without the ghost cells */ -double ***EMfields3D::getBxc() { - double ***arr; - arr = newArr3(double,nxc-2,nyc-2,nzc-2); +void EMfields3D::getBxc(arr3_double& arr) { for (int i = 1; i < nxc-1; i++) for (int j = 1; j < nyc-1; j++) for (int k = 1; k < nzc-1; k++) arr[i-1][j-1][k-1]=Bxc[i][j][k]; - return arr; -} -/*! get By(X,Y,Z) */ -double &EMfields3D::getBy(int indexX, int indexY, int indexZ) const { - return (Byn[indexX][indexY][indexZ]); -} -/*! get Magnetic Field component Y array */ -double ***EMfields3D::getBy() { - return (Byn); } /*! get Magnetic Field component Y array cell without the ghost cells */ -double ***EMfields3D::getByc() { - double ***arr; - arr = newArr3(double,nxc-2,nyc-2,nzc-2); +void EMfields3D::getByc(arr3_double& arr) { for (int i = 1; i < nxc-1; i++) for (int j = 1; j < nyc-1; j++) for (int k = 1; k < nzc-1; k++) arr[i-1][j-1][k-1]=Byc[i][j][k]; - return arr; -} -/*! get Bz(X,Y,Z) */ -double &EMfields3D::getBz(int indexX, int indexY, int indexZ) const { - return (Bzn[indexX][indexY][indexZ]); -} -/*! get Magnetic Field component Z array */ -double ***EMfields3D::getBz() { - return (Bzn); } /*! get Magnetic Field component Z array cell without the ghost cells */ -double ***EMfields3D::getBzc() { - double ***arr; - arr = newArr3(double,nxc-2,nyc-2,nzc-2); +void EMfields3D::getBzc(arr3_double& arr) { for (int i = 1; i < nxc-1; i++) for (int j = 1; j < nyc-1; j++) for (int k = 1; k < nzc-1; k++) arr[i-1][j-1][k-1]=Bzc[i][j][k]; - return arr; -} -/*! get rhoc(X,Y,Z) */ -double &EMfields3D::getRHOc(int indexX, int indexY, int indexZ) const { - return (rhoc[indexX][indexY][indexZ]); -} double ***EMfields3D::getRHOc() { - return (rhoc); -} -/*! get density on node(indexX,indexY,indexZ) */ -double &EMfields3D::getRHOn(int indexX, int indexY, int indexZ) const { - return (rhon[indexX][indexY][indexZ]); -} -/*! get density array defined on nodes */ -double ***EMfields3D::getRHOn() { - return (rhon); -} -/*! get rhos(X,Y,Z) : density for species */ -double &EMfields3D::getRHOns(int indexX, int indexY, int indexZ, int is) const { - return (rhons[is][indexX][indexY][indexZ]); -} -/*! SPECIES: get density array defined on center cells */ -double &EMfields3D::getRHOcs(int indexX, int indexY, int indexZ, int is) const { - return (rhocs[is][indexX][indexY][indexZ]); -} -/*! get density array defined on nodes */ -double ****EMfields3D::getRHOns() { - return (rhons); } /*! get species density component X array cell without the ghost cells */ -double ***EMfields3D::getRHOcs(Grid3DCU *grid, int is) { - double ***arr; - double ****tmp; - - arr = newArr3(double,nxc-2,nyc-2,nzc-2); - tmp = newArr4(double,ns,nxc,nyc,nzc); +void EMfields3D::getRHOcs(arr3_double& arr, Grid3DCU *grid, int is) { + array4_double tmp(ns,nxc,nyc,nzc); grid->interpN2C(tmp, is, rhons); for (int i = 1; i < nxc-1; i++) for (int j = 1; j < nyc-1; j++) for (int k = 1; k < nzc-1; k++) arr[i-1][j-1][k-1]=tmp[is][i][j][k]; - - delArr4(tmp,nxc,nyc,nzc); - return arr; -} - -/*! get Bx_ext(X,Y,Z) */ -double &EMfields3D::getBx_ext(int indexX, int indexY, int indexZ) const{ - return(Bx_ext[indexX][indexY][indexZ]); -} -/*! get By_ext(X,Y,Z) */ -double &EMfields3D::getBy_ext(int indexX, int indexY, int indexZ) const{ - return(By_ext[indexX][indexY][indexZ]); -} -/*! get Bz_ext(X,Y,Z) */ -double &EMfields3D::getBz_ext(int indexX, int indexY, int indexZ) const{ - return(Bz_ext[indexX][indexY][indexZ]); } -/*! get Bx_ext */ -double ***EMfields3D::getBx_ext() { - return(Bx_ext); -} -/*! get By_ext */ -double ***EMfields3D::getBy_ext() { - return(By_ext); -} -/*! get Bz_ext */ -double ***EMfields3D::getBz_ext() { - return(Bz_ext); -} - -/*! SPECIES: get pressure tensor component XX defined on nodes */ -double ****EMfields3D::getpXXsn() { - return (pXXsn); -} -/*! SPECIES: get pressure tensor component XY defined on nodes */ -double ****EMfields3D::getpXYsn() { - return (pXYsn); -} -/*! SPECIES: get pressure tensor component XZ defined on nodes */ -double ****EMfields3D::getpXZsn() { - return (pXZsn); -} -/*! SPECIES: get pressure tensor component YY defined on nodes */ -double ****EMfields3D::getpYYsn() { - return (pYYsn); -} -/*! SPECIES: get pressure tensor component YZ defined on nodes */ -double ****EMfields3D::getpYZsn() { - return (pYZsn); -} -/*! SPECIES: get pressure tensor component ZZ defined on nodes */ -double ****EMfields3D::getpZZsn() { - return (pZZsn); -} -/*! get current -Direction X */ -double &EMfields3D::getJx(int indexX, int indexY, int indexZ) const { - return (Jx[indexX][indexY][indexZ]); -} -/*! get current array X component * */ -double ***EMfields3D::getJx() { - return (Jx); -} -/*! get current -Direction Y */ -double &EMfields3D::getJy(int indexX, int indexY, int indexZ) const { - return (Jy[indexX][indexY][indexZ]); -} -/*! get current array Y component * */ -double ***EMfields3D::getJy() { - return (Jy); -} -/*! get current -Direction Z */ -double &EMfields3D::getJz(int indexX, int indexY, int indexZ) const { - return (Jz[indexX][indexY][indexZ]); -} -/*! get current array Z component * */ -double ***EMfields3D::getJz() { - return (Jz); -} -/*!SPECIES: get current array X component */ -double ****EMfields3D::getJxs() { - return (Jxs); -} -/*! get Jxs(X,Y,Z,is) : density for species */ -double &EMfields3D::getJxs(int indexX, int indexY, int indexZ, int is) const { - return (Jxs[is][indexX][indexY][indexZ]); -} /*! get Magnetic Field component X array species is cell without the ghost cells */ -double ***EMfields3D::getJxsc(Grid3DCU *grid, int is) { - double ***arr; - double ****tmp; - - arr = newArr3(double,nxc-2,nyc-2,nzc-2); - tmp = newArr4(double,ns,nxc,nyc,nzc); +void EMfields3D::getJxsc(arr3_double& arr, Grid3DCU *grid, int is) { + array4_double tmp(ns,nxc,nyc,nzc); grid->interpN2C(tmp, is, Jxs); for (int i = 1; i < nxc-1; i++) for (int j = 1; j < nyc-1; j++) for (int k = 1; k < nzc-1; k++) arr[i-1][j-1][k-1]=tmp[is][i][j][k]; - - delArr4(tmp,nxc,nyc,nzc); - return arr; -} -/*! SPECIES: get current array Y component */ -double ****EMfields3D::getJys() { - return (Jys); } -/*! get Jxs(X,Y,Z,is) : density for species */ -double &EMfields3D::getJys(int indexX, int indexY, int indexZ, int is) const { - return (Jys[is][indexX][indexY][indexZ]); -} -/*! get current component Y array species is cell without the ghost cells */ -double ***EMfields3D::getJysc(Grid3DCU *grid, int is) { - double ***arr; - double ****tmp; - arr = newArr3(double,nxc-2,nyc-2,nzc-2); - tmp = newArr4(double,ns,nxc,nyc,nzc); +/*! get current component Y array species is cell without the ghost cells */ +void EMfields3D::getJysc(arr3_double& arr, Grid3DCU *grid, int is) { + array4_double tmp(ns,nxc,nyc,nzc); grid->interpN2C(tmp, is, Jys); for (int i = 1; i < nxc-1; i++) for (int j = 1; j < nyc-1; j++) for (int k = 1; k < nzc-1; k++) arr[i-1][j-1][k-1]=tmp[is][i][j][k]; - - delArr4(tmp,nxc,nyc,nzc); - return arr; -} -/*!SPECIES: get current array Z component */ -double ****EMfields3D::getJzs() { - return (Jzs); -} -/*! get Jxs(X,Y,Z,is) : density for species */ -double &EMfields3D::getJzs(int indexX, int indexY, int indexZ, int is) const { - return (Jzs[is][indexX][indexY][indexZ]); } /*! get current component Z array species is cell without the ghost cells */ -double ***EMfields3D::getJzsc(Grid3DCU *grid, int is) { - double ***arr; - double ****tmp; - - arr = newArr3(double,nxc-2,nyc-2,nzc-2); - tmp = newArr4(double,ns,nxc,nyc,nzc); +void EMfields3D::getJzsc(arr3_double& arr, Grid3DCU *grid, int is) { + array4_double tmp(ns,nxc,nyc,nzc); grid->interpN2C(tmp, is, Jzs); for (int i = 1; i < nxc-1; i++) for (int j = 1; j < nyc-1; j++) for (int k = 1; k < nzc-1; k++) arr[i-1][j-1][k-1]=tmp[is][i][j][k]; - - delArr4(tmp,nxc,nyc,nzc); - return arr; } /*! get the electric field energy */ double EMfields3D::getEenergy(void) { @@ -3401,67 +3195,16 @@ double EMfields3D::getBenergy(void) { void EMfields3D::print(void) const { } -/*! destructor: deallocate arrays */ +/*! destructor*/ EMfields3D::~EMfields3D() { - // nodes - delArr3(Ex, nxn, nyn); - delArr3(Ey, nxn, nyn); - delArr3(Ez, nxn, nyn); - delArr3(Exth, nxn, nyn); - delArr3(Eyth, nxn, nyn); - delArr3(Ezth, nxn, nyn); - delArr3(Bxn, nxn, nyn); - delArr3(Byn, nxn, nyn); - delArr3(Bzn, nxn, nyn); - delArr3(rhon, nxn, nyn); - delArr3(Jx, nxn, nyn); - delArr3(Jy, nxn, nyn); - delArr3(Jz, nxn, nyn); - delArr3(Jxh, nxn, nyn); - delArr3(Jyh, nxn, nyn); - delArr3(Jzh, nxn, nyn); - // nodes and species - delArr4(rhons, ns, nxn, nyn); - delArr4(Jxs, ns, nxn, nyn); - delArr4(Jys, ns, nxn, nyn); - delArr4(Jzs, ns, nxn, nyn); - delArr4(pXXsn, ns, nxn, nyn); - delArr4(pXYsn, ns, nxn, nyn); - delArr4(pXZsn, ns, nxn, nyn); - delArr4(pYYsn, ns, nxn, nyn); - delArr4(pYZsn, ns, nxn, nyn); - delArr4(pZZsn, ns, nxn, nyn); - // central points - delArr3(PHI, nxc, nyc); - delArr3(Bxc, nxc, nyc); - delArr3(Byc, nxc, nyc); - delArr3(Bzc, nxc, nyc); - delArr3(rhoc, nxc, nyc); - delArr3(rhoh, nxc, nyc); - // various stuff needs to be deallocated too - delArr3(tempXC, nxc, nyc); - delArr3(tempYC, nxc, nyc); - delArr3(tempZC, nxc, nyc); - delArr3(tempXN, nxn, nyn); - delArr3(tempYN, nxn, nyn); - delArr3(tempZN, nxn, nyn); - delArr3(tempC, nxc, nyc); - delArr3(tempX, nxn, nyn); - delArr3(tempY, nxn, nyn); - delArr3(tempZ, nxn, nyn); - delArr3(temp2X, nxn, nyn); - delArr3(temp2Y, nxn, nyn); - delArr3(temp2Z, nxn, nyn); - delArr3(imageX, nxn, nyn); - delArr3(imageY, nxn, nyn); - delArr3(imageZ, nxn, nyn); - delArr3(Dx, nxn, nyn); - delArr3(Dy, nxn, nyn); - delArr3(Dz, nxn, nyn); - delArr3(vectX, nxn, nyn); - delArr3(vectY, nxn, nyn); - delArr3(vectZ, nxn, nyn); - delArr3(divC, nxc, nyc); + delete [] qom; + delete [] rhoINIT; + delete injFieldsLeft; + delete injFieldsRight; + delete injFieldsTop; + delete injFieldsBottom; + delete injFieldsFront; + delete injFieldsRear; for(int i=0;igetYleft_neighbor() == MPI_PROC_NULL) { @@ -268,15 +266,10 @@ void Grid3DCU::lapC2C(double ***lapC, double ***scFieldC, VirtualTopology3D * vc } } divN2C(lapC, gradXN, gradYN, gradZN); - - delArr3(gradXN, nxn, nyn); - delArr3(gradYN, nxn, nyn); - delArr3(gradZN, nxn, nyn); - } /** calculate laplacian on central points, given a scalar field defined on central points for Poisson */ -void Grid3DCU::lapC2Cpoisson(double ***lapC, double ***scFieldC, VirtualTopology3D * vct) { +void Grid3DCU::lapC2Cpoisson(arr3_double& lapC, arr3_double& scFieldC, VirtualTopology3D * vct) { // communicate first the scFieldC communicateCenterBoxStencilBC(nxc, nyc, nzc, scFieldC, 1, 1, 1, 1, 1, 1, vct); for (register int i = 1; i < nxc - 1; i++) @@ -286,7 +279,7 @@ void Grid3DCU::lapC2Cpoisson(double ***lapC, double ***scFieldC, VirtualTopology } /** calculate divergence on boundaries */ -void Grid3DCU::divBCleft(double ***divBC, double ***vectorX, double ***vectorY, double ***vectorZ, int leftActiveNode, int dirDER) { +void Grid3DCU::divBCleft(arr3_double& divBC, const_arr3_double& vectorX, const_arr3_double& vectorY, const_arr3_double& vectorZ, int leftActiveNode, int dirDER) { double compX, compY, compZ; switch (dirDER) { case 0: // DIVERGENCE DIRECTION X @@ -323,7 +316,7 @@ void Grid3DCU::divBCleft(double ***divBC, double ***vectorX, double ***vectorY, } /** calculate divergence on boundaries */ -void Grid3DCU::divBCright(double ***divBC, double ***vectorX, double ***vectorY, double ***vectorZ, int rightActiveNode, int dirDER) { +void Grid3DCU::divBCright(arr3_double& divBC, const_arr3_double& vectorX, const_arr3_double& vectorY, const_arr3_double& vectorZ, int rightActiveNode, int dirDER) { double compX, compY, compZ; @@ -362,7 +355,7 @@ void Grid3DCU::divBCright(double ***divBC, double ***vectorX, double ***vectorY, } /** calculate derivative on left boundary */ -void Grid3DCU::derBC(double ***derBC, double ***vector, int leftActiveNode, int dirDER) { +void Grid3DCU::derBC(arr3_double& derBC, const_arr3_double& vector, int leftActiveNode, int dirDER) { switch (dirDER) { case 0: // DERIVATIVE DIRECTION X for (register int j = 1; j < nyc - 1; j++) @@ -385,7 +378,7 @@ void Grid3DCU::derBC(double ***derBC, double ***vector, int leftActiveNode, int } /** interpolate on nodes from central points: do this for the magnetic field*/ -void Grid3DCU::interpC2N(double ***vecFieldN, double ***vecFieldC) { +void Grid3DCU::interpC2N(arr3_double& vecFieldN, const_arr3_double& vecFieldC) { for (register int i = 1; i < nxn - 1; i++) for (register int j = 1; j < nyn - 1; j++) for (register int k = 1; k < nzn - 1; k++) @@ -393,7 +386,7 @@ void Grid3DCU::interpC2N(double ***vecFieldN, double ***vecFieldC) { } /** interpolate on central points from nodes */ -void Grid3DCU::interpN2C(double ***vecFieldC, double ***vecFieldN) { +void Grid3DCU::interpN2C(arr3_double& vecFieldC, const_arr3_double& vecFieldN) { for (register int i = 1; i < nxc - 1; i++) for (register int j = 1; j < nyc - 1; j++) for (register int k = 1; k < nzc - 1; k++) @@ -401,7 +394,7 @@ void Grid3DCU::interpN2C(double ***vecFieldC, double ***vecFieldN) { } /** interpolate on central points from nodes */ -void Grid3DCU::interpN2C(double ****vecFieldC, int ns, double ****vecFieldN) { +void Grid3DCU::interpN2C(arr4_double& vecFieldC, int ns, const_arr4_double& vecFieldN) { for (register int i = 1; i < nxc - 1; i++) for (register int j = 1; j < nyc - 1; j++) for (register int k = 1; k < nzc - 1; k++) diff --git a/iPic3D.cpp b/iPic3D.cpp index d670264c..ec9b59ba 100644 --- a/iPic3D.cpp +++ b/iPic3D.cpp @@ -2,6 +2,7 @@ #include #include #include "iPic3D.h" +#include "debug.h" using namespace iPic3D; diff --git a/include/Alloc.h b/include/Alloc.h index 8441221e..5c3c5b5d 100644 --- a/include/Alloc.h +++ b/include/Alloc.h @@ -2,6 +2,8 @@ #define IPIC_ALLOC_H #include // for alignment stuff #include "asserts.h" // for assert_le, assert_lt +//#include "errors.h" // for assert_le, assert_lt +#include "arraysfwd.h" //#include "arrays.h" // fixed-dimension arrays /* @@ -15,7 +17,47 @@ For examples of use of this class, see test_arrays.cpp - An alternative would be to use boost arrays. + Compiler options: + -DCHECK_BOUNDS: check bounds when performing array access + (major performance penalty). + -DFLAT_ARRAYS: use calculated 1d subscript to dereference + even for arr[i][j][k] notation. + -DCHAINED_ARRAYS: use hierarchy of pointers to dereference + even for arr.get(i,j,k) notation. + + By default, chained pointers are used for arr[i][j][k] + notation (unless -DCHECK_BOUNDS is turned on, in which case + we don't care about performance anyway), and calculated 1d + subscript is used for arr.get(i,j,k) notation. + + An alternative would have been use boost arrays. Use of our + own array class allows flexibility for our choice of array + implementation, including the possibility of using boost + for the implementation, while avoiding boost as an external + dependency. On some systems, it may be preferable to use + native arrays with hard-coded dimensions; this could suit us + well, since all arrays are approximately the same size, but + would require a recompile when changing the maximum array size. + + Rather than using these templates directly, the typedefs + declared in "arraysfwd.h" should be used: + + * const_arr3_double = const_array_ref3 + * arr3_double = array_ref3 + * array3_double = array3 + + The point is that we do not want to hard-code the fact that + we are using templates, and we may well wish to eliminate use + of templates in the future. (Alternatives are to use the + preprocessor or to have separate implementations for each + type (double, int, possibly float) if we go to use of mixed + precision). Support for templates is notoriously buggy in + compilers, particularly when it comes to inheritance, and I + in fact had to eliminate inheriting from the base_arr class + and use the "protected" hack below in order to get this + code to compile on the latest intel compiler (2013) and on + g++ 4.0 (2005); g++ 4.2 (2007) compiled (but unfortunately, + for my g++ 4.2, iPic3D suffered from stack frame corruption.) */ #define ALIGNMENT (64) #ifdef __INTEL_COMPILER @@ -85,6 +127,48 @@ inline type **** newArray4(size_t sz1, size_t sz2, size_t sz3, size_t sz4) return arr; } +// build chained pointer hierarchy for pre-existing bottom level +// +template +inline type **** newArray4(type * in, size_t sz1, size_t sz2, size_t sz3, size_t sz4) +{ + type****arr = newArray3(sz1,sz2,sz3); + type**arr2 = **arr; + type *ptr = in; + size_t szarr2 = sz1*sz2*sz3; + for(size_t i=0;i +inline type *** newArray3(type * in, size_t sz1, size_t sz2, size_t sz3) +{ + type***arr = newArray2(sz1,sz2); + type**arr2 = *arr; + type *ptr = in; + size_t szarr2 = sz1*sz2; + for(size_t i=0;i +inline type ** newArray2(type * in, size_t sz1, size_t sz2) +{ + type**arr = newArray2(sz1); + type**arr2 = arr; + type *ptr = in; + size_t szarr2 = sz1; + for(size_t i=0;i inline void delArray1(type * arr) @@ -107,319 +191,504 @@ template inline void delArr3(type *** arr, size_t sz1, size_t sz2) template inline void delArr4(type **** arr, size_t sz1, size_t sz2, size_t sz3) { delArray3(arr); } - -// classes to dereference arrays. -// -// ArrayRefN is essentially a dumbed-down version of ArrN with -// an index shift applied to the underlying array. The purpose -// of ArrayRefN is to allow elements of multidimensional arrays -// to be accessed with a calculated one-dimensional index while -// using chained operator[] syntax (e.g. myarr[i][j]), i.e. the -// same syntax as is used for native or nested arrays. This -// implementation is likely to be slow unless optimization is -// turned on, allowing the compiler to figure out that the whole -// chain of calls to the operator[] methods and to the ArrayRefN -// constructors reduces to computing a one-dimensional subscript -// used to access a one-dimensional array. -// -template -class ArrayRef1 + +namespace iPic3D { - type* const __restrict__ arr; - const size_t S1; - const size_t shift; - public: - inline ArrayRef1(type*const arr_, size_t k, size_t s1) : - arr(arr_), shift(k), S1(s1) - {} - inline type& operator[](size_t n1){ - check_bounds(n1, S1); - ALIGNED(arr); - return arr[shift+n1]; - } -}; - -template -class ArrayRef2 -{ - type* const __restrict__ arr; - const size_t shift; - const size_t S2, S1; - public: - inline ArrayRef2(type*const arr_, size_t k, size_t s2, size_t s1) : - arr(arr_), shift(k), S2(s2), S1(s1) - {} - inline ArrayRef1 operator[](size_t n2){ - check_bounds(n2,S2); - return ArrayRef1(arr, (shift+n2)*S1, S1); - } -}; - -template -class ArrayRef3 -{ - type* const __restrict__ arr; - const size_t shift; - const size_t S3, S2, S1; - public: - inline ArrayRef3(type*const arr_, size_t k, size_t s3, size_t s2, size_t s1) : - arr(arr_), shift(k), S3(s3), S2(s2), S1(s1) - {} - inline ArrayRef2 operator[](size_t n3){ - check_bounds(n3, S3); - return ArrayRef2(arr, (shift+n3)*S2, S2, S1); - } -}; - -// ArrN can adopt an array allocated by newArrN -// -// The purpose of these classes is to provide more efficient -// and more regulated access to array elements. The idea is to -// maintain backward compatibility while allowing us to move -// toward a proper array abstraction. -// -// The user of ArrN is responsible for memory management. -// The ArrayN classes are the version of this class -// with automatic deallocation. -// -// Examples: -// -// Using constructor to create array: -// { -// Arr2 arr(16, 16); -// arr[1][2] = 5; -// arr.free(); -// } -// Using ArrN to adopt an array allocated by newArrN -// { -// int** array = newArray2(16,16) -// Arr2 arr(array,16,16); // adopt array -// arr[1][2] = 5; -// assert_eq(arr[1][2],array[1][2]); -// // arr.free(); // should not do both this and next line. -// delArray2(array); -// } -// -// proposed improvements: -// - allow shifting of the base: -// - need "double shift" in each class -// - need to implement "arr3.set_bases(b1,b2,b3);" -// which calculates "shift". -// - need "const size_t b1, b2, b3;" for beginning indices -// to allow bounds checking. Should not incur run-time -// penalty, but it so then condition on CHECK_BOUNDS. -// - methods that use parallel arithmetic for omp and vectorized code - -template -class Arr1 -{ - private: // data - const size_t S1; + // underlying 1-dimensional array class for arrays + + template + class base_arr + { + private: + size_t size; + protected: + type* const __restrict__ arr; + type* get_arr()const{return arr;} + public: + base_arr(size_t s) : size(s), arr(AlignedAlloc(type, s)) {} + base_arr(type* in, size_t s) : size(s), arr(in) {} + ~base_arr(){} + void free() { AlignedFree(arr); } + void setall(type val){ + for(size_t i=0;i + class ArrayGet1 + { type* const __restrict__ arr; - public: - ~Arr1() { } - void free() { AlignedFree(arr); } - Arr1(size_t s1) : - S1(s1), - arr(AlignedAlloc(type, s1)) - { } - Arr1(type* in, - size_t s1) : - S1(s1), - arr(in) - { } + const size_t S1; + const size_t shift; + public: + inline ArrayGet1(type*const arr_, size_t k, size_t s1) : + arr(arr_), shift(k), S1(s1) + {} inline type& operator[](size_t n1){ check_bounds(n1, S1); ALIGNED(arr); - return arr[n1]; - } - inline size_t getidx(size_t n1) const - { - check_bounds(n1, S1); - return n1; + return arr[shift+n1]; } - const type& get(size_t n1) const - { ALIGNED(arr); return arr[getidx(n1)]; } - type& fetch(size_t n2,size_t n1) const - { ALIGNED(arr); return arr[getidx(n1)]; } - void set(size_t n1, type value) - { ALIGNED(arr); arr[getidx(n1)] = value; } -}; - -template -class Arr2 -{ - private: // data - const size_t S2,S1; + }; + + template + class ArrayGet2 + { type* const __restrict__ arr; - public: - ~Arr2(){} - void free() { AlignedFree(arr); } - Arr2(size_t s2, size_t s1) : - S2(s2), S1(s1), - arr(AlignedAlloc(type, s2*s1)) - { - } - Arr2(type*const* in, - size_t s2, size_t s1) : - S2(s2), S1(s1), - arr(*in) - { } - // for backwards compatibility support bracket notation - inline ArrayRef1 operator[](size_t n2){ - check_bounds(n2, S2); - return ArrayRef1(arr, n2*S1, S1); + const size_t shift; + const size_t S2, S1; + public: + inline ArrayGet2(type*const arr_, size_t k, size_t s2, size_t s1) : + arr(arr_), shift(k), S2(s2), S1(s1) + {} + inline ArrayGet1 operator[](size_t n2){ + check_bounds(n2,S2); + return ArrayGet1(arr, (shift+n2)*S1, S1); } - inline size_t getidx(size_t n2, size_t n1) const - { - check_bounds(n2, S2); - check_bounds(n1, S1); - return n2*S1+n1; - } - // I prefer "fetch" over operator() to hilight read/write access - //type& operator()(size_t n2, size_t n1) const - // { ALIGNED(arr); return arr[n1+S1*n2]; } - type& fetch(size_t n2,size_t n1) const - { ALIGNED(arr); return arr[getidx(n2,n1)]; } - // better to use accessors that distinguish read from write: - const type& get(size_t n2,size_t n1) const - { ALIGNED(arr); return arr[getidx(n2,n1)]; } - void set(size_t n2,size_t n1, type value) - { ALIGNED(arr); arr[getidx(n2,n1)] = value; } - inline Arr1fetch_Arr1(){ return Arr1(arr, S1*S2); } -}; - -template -class Arr3 -{ - private: // data - const size_t S3,S2,S1; + }; + + template + class ArrayGet3 + { type* const __restrict__ arr; - public: - size_t dim1()const{return S1;} - size_t dim2()const{return S2;} - size_t dim3()const{return S3;} - ~Arr3(){} - void free() { AlignedFree(arr); } - Arr3(size_t s3, size_t s2, size_t s1) : - S3(s3), S2(s2), S1(s1), - arr(AlignedAlloc(type, s3*s2*s1)) - { } - Arr3(type*const*const* in, - size_t s3, size_t s2, size_t s1) : - S3(s3), S2(s2), S1(s1), - arr(**in) - { } - inline ArrayRef2 operator[](size_t n3){ + const size_t shift; + const size_t S3, S2, S1; + public: + inline ArrayGet3(type*const arr_, size_t k, size_t s3, size_t s2, size_t s1) : + arr(arr_), shift(k), S3(s3), S2(s2), S1(s1) + {} + inline ArrayGet2 operator[](size_t n3){ check_bounds(n3, S3); - return ArrayRef2(arr, n3*S2, S2, S1); + return ArrayGet2(arr, (shift+n3)*S2, S2, S1); } - inline size_t getidx(size_t n3, size_t n2, size_t n1) const - { - check_bounds(n3, S3); - check_bounds(n2, S2); + }; + + // const versions + + template + class const_array_get1 + { + type* const __restrict__ arr; + const size_t S1; + const size_t shift; + public: + inline const_array_get1(type*const arr_, size_t k, size_t s1) : + arr(arr_), shift(k), S1(s1) + {} + inline const type& operator[](size_t n1)const{ check_bounds(n1, S1); - return (n3*S2+n2)*S1+n1; + ALIGNED(arr); + return arr[shift+n1]; } - //type& operator()(size_t n3, size_t n2, size_t n1) const - //{ ALIGNED(arr); return arr[getidx(n3,n2,n1)]; } - type& fetch(size_t n3,size_t n2,size_t n1) const - { ALIGNED(arr); return arr[getidx(n3,n2,n1)]; } - const type& get(size_t n3,size_t n2,size_t n1) const - { ALIGNED(arr); return arr[getidx(n3,n2,n1)]; } - void set(size_t n3,size_t n2,size_t n1, type value) - { ALIGNED(arr); arr[getidx(n3,n2,n1)] = value; } - inline Arr1fetch_Arr1(){ return Arr1(arr, S1*S2*S3); } -}; - -template -class Arr4 -{ - private: // data - const size_t S4,S3,S2,S1; + }; + + template + class const_array_get2 + { type* const __restrict__ arr; - public: - ~Arr4(){} // nonempty destructor would kill performance - void free() { AlignedFree(arr); } - Arr4(size_t s4, size_t s3, size_t s2, size_t s1) : - arr(AlignedAlloc(type, s4*s3*s2*s1)), - S4(s4), S3(s3), S2(s2), S1(s1) - { } - Arr4(type*const*const*const* in, - size_t s4, size_t s3, size_t s2, size_t s1) : - S4(s4), S3(s3), S2(s2), S1(s1), - arr(***in) - { } - inline ArrayRef3 operator[](size_t n4){ - check_bounds(n4, S4); - return ArrayRef3(arr, n4*S3, S3, S2, S1); + const size_t shift; + const size_t S2, S1; + public: + inline const_array_get2(type*const arr_, size_t k, size_t s2, size_t s1) : + arr(arr_), shift(k), S2(s2), S1(s1) + {} + inline const const_array_get1 operator[](size_t n2)const{ + check_bounds(n2,S2); + return const_array_get1(arr, (shift+n2)*S1, S1); } - inline size_t getidx(size_t n4, size_t n3, size_t n2, size_t n1) const - { - check_bounds(n4, S4); + }; + + template + class const_array_get3 + { + type* const __restrict__ arr; + const size_t shift; + const size_t S3, S2, S1; + public: + const_array_get3(type*const arr_, size_t k, size_t s3, size_t s2, size_t s1) : + arr(arr_), shift(k), S3(s3), S2(s2), S1(s1) + {} + inline const const_array_get2 operator[](size_t n3)const{ check_bounds(n3, S3); - check_bounds(n2, S2); - check_bounds(n1, S1); - return ((n4*S3+n3)*S2+n2)*S1+n1; + return const_array_get2(arr, (shift+n3)*S2, S2, S1); } - const type& get(size_t n4,size_t n3,size_t n2,size_t n1) const - { ALIGNED(arr); return arr[getidx(n4,n3,n2,n1)]; } - type& fetch(size_t n4,size_t n3,size_t n2,size_t n1) const - { ALIGNED(arr); return arr[getidx(n4,n3,n2,n1)]; } - void set(size_t n4,size_t n3,size_t n2,size_t n1, type value) - { ALIGNED(arr); arr[getidx(n4,n3,n2,n1)] = value; } -}; - -// Versions of array classes which automatically free memory. -// -// Note that the nonempty destructor kills performance -// unless compiling with -fno-exceptions - -template -struct Array1 : public Arr1 -{ - ~Array1(){Arr1::free();} - Array1(size_t s1) : Arr1(s1) { } -}; - -template -struct Array2 : public Arr2 -{ - ~Array2(){Arr2::free();} - Array2(size_t s2, size_t s1) : Arr2(s2,s1) { } -}; - -template -struct Array3 : public Arr3 -{ - ~Array3(){Arr3::free();} - Array3(size_t s3, size_t s2, size_t s1) : Arr3(s3,s2,s1) { } -}; + }; + + // ArrN corresponds to multi_array_ref in the boost library. + // + // ArrN can adopt an array allocated by newArrN + // + // The purpose of these classes is to provide more efficient + // and more regulated access to array elements. The idea is to + // maintain backward compatibility while allowing us to move + // toward a proper array abstraction. + // + // The user of ArrN is responsible for memory management. + // The ArrayN classes are the version of this class + // with automatic deallocation. + // + // Examples: + // + // Using constructor to create array: + // { + // array_ref2 arr(16, 16); + // arr[1][2] = 5; + // arr.free(); + // } + // Using ArrN to adopt an array allocated by newArrN + // { + // int** array = newArray2(16,16) + // array_ref2 arr(array,16,16); // adopt array + // arr[1][2] = 5; + // assert_eq(arr[1][2],array[1][2]); + // // arr.free(); // should not do both this and next line. + // delArray2(array); + // } + // + // proposed improvements: + // - allow shifting of the base: + // - need "double shift" in each class + // - need to implement "arr3.set_bases(b1,b2,b3);" + // which calculates "shift". + // - need "const size_t b1, b2, b3;" for beginning indices + // to allow bounds checking. Should not incur run-time + // penalty, but it so then condition on CHECK_BOUNDS. + // - methods that use parallel arithmetic for omp and vectorized code + + template + class array_ref1 + { + private: // data + const size_t S1; + type* const __restrict__ arr; + public: + ~array_ref1() { } + void free() { AlignedFree(arr); } + array_ref1(size_t s1) : + S1(s1), + arr(AlignedAlloc(type, s1)) + { } + array_ref1(type* in, + size_t s1) : + S1(s1), + arr(in) + { } + inline type& operator[](size_t n1){ + check_bounds(n1, S1); + ALIGNED(arr); + return arr[n1]; + } + inline size_t getidx(size_t n1) const + { + check_bounds(n1, S1); + return n1; + } + const type& get(size_t n1) const + { ALIGNED(arr); return arr[getidx(n1)]; } + type& fetch(size_t n2,size_t n1) const + { ALIGNED(arr); return arr[getidx(n1)]; } + void set(size_t n1, type value) + { ALIGNED(arr); arr[getidx(n1)] = value; } + }; + + template + class array_ref2 + { + private: // data + const size_t S2,S1; + type* const __restrict__ arr; + public: + ~array_ref2(){} + void free() { AlignedFree(arr); } + array_ref2(size_t s2, size_t s1) : + S2(s2), S1(s1), + arr(AlignedAlloc(type, s2*s1)) + { + } + array_ref2(type*const* in, + size_t s2, size_t s1) : + S2(s2), S1(s1), + arr(*in) + { } + // dereference via calculated index + inline ArrayGet1 operator[](size_t n2){ + check_bounds(n2, S2); + return ArrayGet1(arr, n2*S1, S1); + } + inline size_t getidx(size_t n2, size_t n1) const + { + check_bounds(n2, S2); + check_bounds(n1, S1); + return n2*S1+n1; + } + type& fetch(size_t n2, size_t n1) const + { ALIGNED(arr); return arr[n1+S1*n2]; } + // better to use accessors that distinguish read from write: + const type& get(size_t n2,size_t n1) const + { ALIGNED(arr); return arr[getidx(n2,n1)]; } + void set(size_t n2,size_t n1, type value) + { ALIGNED(arr); arr[getidx(n2,n1)] = value; } + //inline array_ref1fetch_Arr1(){ return array_ref1(arr, S1*S2); } + }; + + template + class const_array_ref3 // : public base_arr + { + //using base_arr::get_arr; + //using base_arr::arr; + protected: // data + size_t size; + const size_t S3,S2,S1; + type* const __restrict__ arr; + type*const*const*const arr3; + public: + ~const_array_ref3(){} + const_array_ref3(size_t s3, size_t s2, size_t s1) : + size(s3*s2*s1), arr(AlignedAlloc(type, size)), + //base_arr(s3*s2*s1), + S3(s3), S2(s2), S1(s1), + arr3(newArray3(arr,s3,s2,s1)) + { } + const_array_ref3(type*const*const* in, + size_t s3, size_t s2, size_t s1) : + size(s3*s2*s1), arr(**in), + //base_arr(**in, s3*s2*s1), + S3(s3), S2(s2), S1(s1), + arr3(in) + { } + #if defined(FLAT_ARRAYS) || defined(CHECK_BOUNDS) + const const_array_get2 operator[](size_t n3)const{ + check_bounds(n3, S3); + return const_array_get2(arr, n3*S2, S2, S1); + } + #else + // this causes operator[] to dereference via chained pointer + operator type***(){ return (type***) arr3; } + #endif + void check_idx_bounds(size_t n3, size_t n2, size_t n1) const + { + check_bounds(n3, S3); + check_bounds(n2, S2); + check_bounds(n1, S1); + } + inline size_t getidx(size_t n3, size_t n2, size_t n1) const + { check_idx_bounds(n3,n2,n1); return (n3*S2+n2)*S1+n1; } + #ifdef CHAINED_ARRAYS + const type& get(size_t n3,size_t n2,size_t n1) const + { check_idx_bounds(n3,n2,n1); return arr3[n3][n2][n1]; } + protected: // hack: not in const_array_ref3 due to icpc compile error + type& fetch(size_t n3,size_t n2,size_t n1) const + { check_idx_bounds(n3,n2,n1); return arr3[n3][n2][n1]; } + void set(size_t n3,size_t n2,size_t n1, type value) + { check_idx_bounds(n3,n2,n1); arr3[n3][n2][n1] = value; } + #else + const type& get(size_t n3,size_t n2,size_t n1) const + { ALIGNED(arr); return arr[getidx(n3,n2,n1)]; } + protected: // hack: not in const_array_ref3 due to icpc compile error + type& fetch(size_t n3,size_t n2,size_t n1) const + { ALIGNED(arr); return arr[getidx(n3,n2,n1)]; } + void set(size_t n3,size_t n2,size_t n1, type value) + { ALIGNED(arr); arr[getidx(n3,n2,n1)] = value; } + #endif + }; + + template + class array_ref3 : public const_array_ref3 + { + //using base_arr::arr; + //using base_arr::get_arr; + using const_array_ref3::size; + using const_array_ref3::arr; + using const_array_ref3::S3; + using const_array_ref3::S2; + using const_array_ref3::S1; + using const_array_ref3::arr3; + using const_array_ref3::getidx; + public: + ~array_ref3(){} + array_ref3(size_t s3, size_t s2, size_t s1) : + const_array_ref3(s3,s2,s1) + { } + array_ref3(type*const*const* in, + size_t s3, size_t s2, size_t s1) : + const_array_ref3(in,s3,s2,s1) + { } + void free(){ delArray3((type***)arr3); } + #if defined(FLAT_ARRAYS) || defined(CHECK_BOUNDS) + inline ArrayGet2 operator[](size_t n3){ + check_bounds(n3, S3); + return ArrayGet2(arr, n3*S2, S2, S1); + } + #else + // this causes operator[] to dereference via chained pointer + operator type***(){ return (type***) arr3; } + #endif + type& fetch(size_t n3,size_t n2,size_t n1) const + { return const_array_ref3::fetch(n3,n2,n1); } + void set(size_t n3,size_t n2,size_t n1, type value) + { const_array_ref3::set(n3,n2,n1, value); } + void setall(type val){ + for(size_t i=0;i causes problems in g++ 4.0 (2005). + template + class const_array_ref4 //: public base_arr + { + //using base_arr::get_arr; + protected: // data + size_t size; + const size_t S4,S3,S2,S1; + type* const __restrict__ arr; + type*const*const*const*const arr4; + public: + ~const_array_ref4(){} + const_array_ref4(size_t s4, size_t s3, size_t s2, size_t s1) : + size(s4*s3*s2*s1), arr(AlignedAlloc(type, size)), + //base_arr(s4*s3*s2*s1), + S4(s4), S3(s3), S2(s2), S1(s1), + arr4(newArray4(arr,s4,s3,s2,s1)) + { } + const_array_ref4(type*const*const*const* in, + size_t s4, size_t s3, size_t s2, size_t s1) : + size(s4*s3*s2*s1), arr(***in), + //base_arr(***in, s4*s3*s2*s1), + S4(s4), S3(s3), S2(s2), S1(s1), + arr4(in) + { } + #if defined(FLAT_ARRAYS) || defined(CHECK_BOUNDS) + const const_array_get3 operator[](size_t n4)const{ + check_bounds(n4, S4); + return const_array_get3(arr, n4*S3, S3, S2, S1); + } + #else + // this causes operator[] to dereference via chained pointer + operator type****(){ return (type****) arr4; } + #endif + void check_idx_bounds(size_t n4, size_t n3, size_t n2, size_t n1) const + { + check_bounds(n4, S4); + check_bounds(n3, S3); + check_bounds(n2, S2); + check_bounds(n1, S1); + } + inline size_t getidx(size_t n4, size_t n3, size_t n2, size_t n1) const + { check_idx_bounds(n4,n3,n2,n1); return ((n4*S3+n3)*S2+n2)*S1+n1; } + #ifdef CHAINED_ARRAYS + const type& get(size_t n4,size_t n3,size_t n2,size_t n1) const + { ALIGNED(arr); return arr[getidx(n4,n3,n2,n1)]; } + protected: // hack: not in const_array_ref4 due to icpc compile error + type& fetch(size_t n4,size_t n3,size_t n2,size_t n1) const + { ALIGNED(arr); return arr[getidx(n4,n3,n2,n1)]; } + void set(size_t n4,size_t n3,size_t n2,size_t n1, type value) + { ALIGNED(arr); arr[getidx(n4,n3,n2,n1)] = value; } + #else + const type& get(size_t n4,size_t n3,size_t n2,size_t n1) const + { check_idx_bounds(n4,n3,n2,n1); return arr4[n4][n3][n2][n1]; } + protected: // hack: not in const_array_ref4 due to icpc compile error + type& fetch(size_t n4,size_t n3,size_t n2,size_t n1) const + { check_idx_bounds(n4,n3,n2,n1); return arr4[n4][n3][n2][n1]; } + void set(size_t n4,size_t n3,size_t n2,size_t n1, type value) + { check_idx_bounds(n4,n3,n2,n1); arr4[n4][n3][n2][n1] = value; } + #endif + }; + + template + class array_ref4 : public const_array_ref4 + { + //using base_arr::get_arr; + using const_array_ref4::arr; + using const_array_ref4::S4; + using const_array_ref4::S3; + using const_array_ref4::S2; + using const_array_ref4::S1; + using const_array_ref4::arr4; + using const_array_ref4::getidx; + public: + ~array_ref4(){} + array_ref4(size_t s4, size_t s3, size_t s2, size_t s1) : + const_array_ref4(s4,s3,s2,s1) + { } + array_ref4(type*const*const*const* in, + size_t s4, size_t s3, size_t s2, size_t s1) : + const_array_ref4(in,s4,s3,s2,s1) + { } + #if defined(FLAT_ARRAYS) || defined(CHECK_BOUNDS) + inline ArrayGet3 operator[](size_t n4){ + check_bounds(n4, S4); + return ArrayGet3(arr, n4*S3, S3, S2, S1); + } + #else + operator type****(){ return (type****) arr4; } + #endif + type& fetch(size_t n4,size_t n3,size_t n2,size_t n1) const + { return const_array_ref4::fetch(n4,n3,n2,n1); } + void set(size_t n4,size_t n3,size_t n2,size_t n1, type value) + { const_array_ref4::set(n4,n3,n2,n1, value); } + void free(){ delArray4((type****)arr4); } + type**** fetch_arr4(){ return (type****) arr4; } + //bool verify_dims(size_t s4, size_t s3, size_t s2, size_t s1){ + // if(s4==S4 && s3==S3 && s2==S2 && s1==S1) return true; + // Wprintf("%d==%d && %d==%d && %d==%d && %d==%d failed", + // s4, S4, s3, S3, s2, S2, s1, S1); + // return false; + //} + }; + + // Versions of array classes which automatically free memory + // (corresponding to multi_array in the boost library). + // + // Note that the nonempty destructor kills performance + // unless compiling with -fno-exceptions + + template + struct array1 : public array_ref1 + { + ~array1(){array_ref1::free();} + array1(size_t s1) : array_ref1(s1) { } + }; + + template + struct array2 : public array_ref2 + { + ~array2(){array_ref2::free();} + array2(size_t s2, size_t s1) : array_ref2(s2,s1) { } + }; + + template + struct array3 : public array_ref3 + { + ~array3(){array_ref3::free();} + array3(size_t s3, size_t s2, size_t s1) : array_ref3(s3,s2,s1) { } + }; + + template + struct array4 : public array_ref4 + { + ~array4(){array_ref4::free();} + array4(size_t s4, size_t s3, size_t s2, size_t s1) + : array_ref4(s4,s3,s2,s1) { } + }; -template -struct Array4 : public Arr4 -{ - ~Array4(){Arr4::free();} - Array4(size_t s4, size_t s3, size_t s2, size_t s1) - : Arr4(s4,s3,s2,s1) { } -}; +} -// These aliases are defined for the following flexibilization purposes: -// - to avoid filling the code with template brackets -// (i.e., to minimize explicitly template-dependent code). -// - so that they can be redefined according to the user's -// preferred array implementation. -// -typedef Arr1 intArr1; -typedef Arr2 intArr2; -typedef Arr3 intArr3; -typedef Arr4 intArr4; -typedef Arr1 doubleArr1; -typedef Arr2 doubleArr2; -typedef Arr3 doubleArr3; -typedef Arr4 doubleArr4; -typedef ArrayRef1 doubleArrRef1; -// #define newArr4(type,sz1,sz2,sz3,sz4) newArray4((sz1),(sz2),(sz3),(sz4)) #define newArr3(type,sz1,sz2,sz3) newArray3((sz1),(sz2),(sz3)) #define newArr2(type,sz1,sz2) newArray2((sz1),(sz2)) diff --git a/include/Basic.h b/include/Basic.h index 323af124..f444b216 100644 --- a/include/Basic.h +++ b/include/Basic.h @@ -11,6 +11,7 @@ developers: Stefano Markidis, Giovanni Lapenta #include "MPIdata.h" #include "EllipticF.h" +#include "Alloc.h" using std::cout; using std::endl; @@ -63,11 +64,11 @@ inline double norm2(double **vect, int nx, int ny) { return (result); } /** method to calculate the square norm of a vector */ -inline double norm2(double ***vect, int nx, int ny) { +inline double norm2(const arr3_double& vect, int nx, int ny) { double result = 0; for (int i = 0; i < nx; i++) for (int j = 0; j < ny; j++) - result += vect[i][j][0] * vect[i][j][0]; + result += vect.get(i,j,0) * vect.get(i,j,0); return (result); } /** method to calculate the square norm of a vector */ @@ -81,13 +82,13 @@ inline double norm2(double *vect, int nx) { /** method to calculate the parallel dot product */ -inline double norm2P(double ***vect, int nx, int ny, int nz) { +inline double norm2P(const arr3_double& vect, int nx, int ny, int nz) { double result = 0; double local_result = 0; for (int i = 0; i < nx; i++) for (int j = 0; j < ny; j++) for (int k = 0; k < nz; k++) - local_result += vect[i][j][k] * vect[i][j][k]; + local_result += vect.get(i,j,k) * vect.get(i,j,k); MPI_Allreduce(&local_result, &result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); return (result); @@ -127,60 +128,56 @@ inline void sum(double *vect1, double *vect2, int n) { } /** method to calculate the sum of two vectors vector1 = vector1 + vector2*/ -inline void sum(double ***vect1, double ***vect2, int nx, int ny, int nz) { +inline void sum(arr3_double& vect1, const arr3_double& vect2, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) - vect1[i][j][k] += vect2[i][j][k]; + vect1.fetch(i,j,k) += vect2.get(i,j,k); } /** method to calculate the sum of two vectors vector1 = vector1 + vector2*/ -inline void sum(double ***vect1, double ***vect2, int nx, int ny) { +inline void sum(arr3_double& vect1, const arr3_double& vect2, int nx, int ny) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) - vect1[i][j][0] += vect2[i][j][0]; + vect1.fetch(i,j,0) += vect2.get(i,j,0); } /** method to calculate the sum of two vectors vector1 = vector1 + vector2*/ -inline void sum(double ***vect1, double ****vect2, int nx, int ny, int nz, int ns) { +inline void sum(arr3_double& vect1, const arr4_double& vect2, int nx, int ny, int nz, int ns) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) - vect1[i][j][k] += vect2[ns][i][j][k]; + vect1.fetch(i,j,k) += vect2.get(ns,i,j,k); } /** method to calculate the sum of two vectors vector1 = vector1 + vector2*/ -inline void sum(double ***vect1, double ****vect2, int nx, int ny, int ns) { +inline void sum(arr3_double& vect1, const arr4_double& vect2, int nx, int ny, int ns) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) - vect1[i][j][0] += vect2[ns][i][j][0]; + vect1.fetch(i,j,0) += vect2.get(ns,i,j,0); } /** method to calculate the subtraction of two vectors vector1 = vector1 - vector2*/ -inline void sub(double ***vect1, double ***vect2, int nx, int ny, int nz) { +inline void sub(arr3_double& vect1, const arr3_double& vect2, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) - vect1[i][j][k] -= vect2[i][j][k]; - - + vect1.fetch(i,j,k) -= vect2.get(i,j,k); } /** method to calculate the subtraction of two vectors vector1 = vector1 - vector2*/ -inline void sub(double ***vect1, double ***vect2, int nx, int ny) { +inline void sub(arr3_double& vect1, const arr3_double& vect2, int nx, int ny) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) - vect1[i][j][0] -= vect2[i][j][0]; - - + vect1.fetch(i,j,0) -= vect2.get(i,j,0); } /** method to sum 4 vectors vector1 = alfa*vector1 + beta*vector2 + gamma*vector3 + delta*vector4 */ -inline void sum4(double ***vect1, double alfa, double ***vect2, double beta, double ***vect3, double gamma, double ***vect4, double delta, double ***vect5, int nx, int ny, int nz) { +inline void sum4(arr3_double& vect1, double alfa, const arr3_double& vect2, double beta, const arr3_double& vect3, double gamma, const arr3_double& vect4, double delta, const arr3_double& vect5, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) - vect1[i][j][k] = alfa * (vect2[i][j][k] + beta * vect3[i][j][k] + gamma * vect4[i][j][k] + delta * vect5[i][j][k]); + vect1.fetch(i,j,k) = alfa * (vect2.get(i,j,k) + beta * vect3.get(i,j,k) + gamma * vect4.get(i,j,k) + delta * vect5.get(i,j,k)); } /** method to calculate the scalar-vector product */ @@ -190,19 +187,19 @@ inline void scale(double *vect, double alfa, int n) { } /** method to calculate the scalar-vector product */ -inline void scale(double ***vect, double alfa, int nx, int ny) { +inline void scale(arr3_double& vect, double alfa, int nx, int ny) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) - vect[i][j][0] *= alfa; + vect.fetch(i,j,0) *= alfa; } /** method to calculate the scalar-vector product */ -inline void scale(double ***vect, double alfa, int nx, int ny, int nz) { +inline void scale(arr3_double& vect, double alfa, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) - vect[i][j][k] *= alfa; + vect.fetch(i,j,k) *= alfa; } /** method to calculate the scalar product */ inline void scale(double vect[][2][2], double alfa, int nx, int ny, int nz) { @@ -212,18 +209,18 @@ inline void scale(double vect[][2][2], double alfa, int nx, int ny, int nz) { vect[i][j][k] *= alfa; } /** method to calculate the scalar-vector product */ -inline void scale(double ***vect1, double ***vect2, double alfa, int nx, int ny, int nz) { +inline void scale(arr3_double& vect1, const arr3_double& vect2, double alfa, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) - vect1[i][j][k] = vect2[i][j][k] * alfa; + vect1.fetch(i,j,k) = vect2.get(i,j,k) * alfa; } /** method to calculate the scalar-vector product */ -inline void scale(double ***vect1, double ***vect2, double alfa, int nx, int ny) { +inline void scale(arr3_double& vect1, const arr3_double& vect2, double alfa, int nx, int ny) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) - vect1[i][j][0] = vect2[i][j][0] * alfa; + vect1.fetch(i,j,0) = vect2.get(i,j,0) * alfa; } /** method to calculate the scalar-vector product */ @@ -233,11 +230,11 @@ inline void scale(double *vect1, double *vect2, double alfa, int n) { } /** method to calculate vector1 = vector1 + alfa*vector2 */ -inline void addscale(double alfa, double ***vect1, double ***vect2, int nx, int ny, int nz) { +inline void addscale(double alfa, arr3_double& vect1, const arr3_double& vect2, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) - vect1[i][j][k] = vect1[i][j][k] + alfa * vect2[i][j][k]; + vect1.fetch(i,j,k) = vect1.get(i,j,k) + alfa * vect2.get(i,j,k); } /** add scale for weights */ inline void addscale(double alfa, double vect1[][2][2], double vect2[][2][2], int nx, int ny, int nz) { @@ -248,10 +245,10 @@ inline void addscale(double alfa, double vect1[][2][2], double vect2[][2][2], in } /** method to calculate vector1 = vector1 + alfa*vector2 */ -inline void addscale(double alfa, double ***vect1, double ***vect2, int nx, int ny) { +inline void addscale(double alfa, arr3_double& vect1, const arr3_double& vect2, int nx, int ny) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) - vect1[i][j][0] += alfa * vect2[i][j][0]; + vect1.fetch(i,j,0) += alfa * vect2.get(i,j,0); } /** method to calculate vector1 = vector1 + alfa*vector2 */ inline void addscale(double alfa, double *vect1, double *vect2, int n) { @@ -266,90 +263,88 @@ inline void addscale(double alfa, double beta, double *vect1, double *vect2, int } /** method to calculate vector1 = beta*vector1 + alfa*vector2 */ -inline void addscale(double alfa, double beta, double ***vect1, double ***vect2, int nx, int ny, int nz) { +inline void addscale(double alfa, double beta, arr3_double& vect1, const arr3_double& vect2, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) { - vect1[i][j][k] = beta * vect1[i][j][k] + alfa * vect2[i][j][k]; + vect1.fetch(i,j,k) = beta * vect1.get(i,j,k) + alfa * vect2.get(i,j,k); } } /** method to calculate vector1 = beta*vector1 + alfa*vector2 */ -inline void addscale(double alfa, double beta, double ***vect1, double ***vect2, int nx, int ny) { +inline void addscale(double alfa, double beta, arr3_double& vect1, const arr3_double& vect2, int nx, int ny) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) - vect1[i][j][0] = beta * vect1[i][j][0] + alfa * vect2[i][j][0]; + vect1.fetch(i,j,0) = beta * vect1.get(i,j,0) + alfa * vect2.get(i,j,0); } /** method to calculate vector1 = alfa*vector2 + beta*vector3 */ -inline void scaleandsum(double ***vect1, double alfa, double beta, double ***vect2, double ***vect3, int nx, int ny, int nz) { +inline void scaleandsum(arr3_double& vect1, double alfa, double beta, const arr3_double& vect2, const arr3_double& vect3, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) - vect1[i][j][k] = alfa * vect2[i][j][k] + beta * vect3[i][j][k]; + vect1.fetch(i,j,k) = alfa * vect2.get(i,j,k) + beta * vect3.get(i,j,k); } /** method to calculate vector1 = alfa*vector2 + beta*vector3 with vector2 depending on species*/ -inline void scaleandsum(double ***vect1, double alfa, double beta, double ****vect2, double ***vect3, int ns, int nx, int ny, int nz) { +inline void scaleandsum(arr3_double& vect1, double alfa, double beta, const arr4_double& vect2, const arr3_double& vect3, int ns, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) - vect1[i][j][k] = alfa * vect2[ns][i][j][k] + beta * vect3[i][j][k]; + vect1.fetch(i,j,k) = alfa * vect2.get(ns,i,j,k) + beta * vect3.get(i,j,k); } /** method to calculate vector1 = alfa*vector2*vector3 with vector2 depending on species*/ -inline void prod(double ***vect1, double alfa, double ****vect2, int ns, double ***vect3, int nx, int ny, int nz) { +inline void prod(arr3_double& vect1, double alfa, const arr4_double& vect2, int ns, const arr3_double& vect3, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) - vect1[i][j][k] = alfa * vect2[ns][i][j][k] * vect3[i][j][k]; + vect1.fetch(i,j,k) = alfa * vect2.get(ns,i,j,k) * vect3.get(i,j,k); } /** method to calculate vect1 = vect2/alfa */ -inline void div(double ***vect1, double alfa, double ***vect2, int nx, int ny, int nz) { +inline void div(arr3_double& vect1, double alfa, const arr3_double& vect2, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) - vect1[i][j][k] = vect2[i][j][k] / alfa; + vect1.fetch(i,j,k) = vect2.get(i,j,k) / alfa; } -inline void prod6(double ***vect1, double ***vect2, double ***vect3, double ***vect4, double ***vect5, double ***vect6, double ***vect7, int nx, int ny, int nz) { +inline void prod6(arr3_double& vect1, const arr3_double& vect2, const arr3_double& vect3, const arr3_double& vect4, const arr3_double& vect5, const arr3_double& vect6, const arr3_double& vect7, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) - vect1[i][j][k] = vect2[i][j][k] * vect3[i][j][k] + vect4[i][j][k] * vect5[i][j][k] + vect6[i][j][k] * vect7[i][j][k]; + vect1.fetch(i,j,k) = vect2.get(i,j,k) * vect3.get(i,j,k) + vect4.get(i,j,k) * vect5.get(i,j,k) + vect6.get(i,j,k) * vect7.get(i,j,k); } /** method used for calculating PI */ -inline void proddiv(double ***vect1, double ***vect2, double alfa, double ***vect3, double ***vect4, double ***vect5, double ***vect6, double beta, double ***vect7, double ***vect8, double gamma, double ***vect9, int nx, int ny, int nz) { +inline void proddiv(arr3_double& vect1, const arr3_double& vect2, double alfa, const arr3_double& vect3, const arr3_double& vect4, const arr3_double& vect5, const arr3_double& vect6, double beta, const arr3_double& vect7, const arr3_double& vect8, double gamma, const arr3_double& vect9, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) - vect1[i][j][k] = (vect2[i][j][k] + alfa * (vect3[i][j][k] * vect4[i][j][k] - vect5[i][j][k] * vect6[i][j][k]) + beta * vect7[i][j][k] * vect8[i][j][k]) / (1 + gamma * vect9[i][j][k]); + vect1.fetch(i,j,k) = (vect2.get(i,j,k) + alfa * (vect3.get(i,j,k) * vect4.get(i,j,k) - vect5.get(i,j,k) * vect6.get(i,j,k)) + beta * vect7.get(i,j,k) * vect8.get(i,j,k)) / (1 + gamma * vect9.get(i,j,k)); // questo mi convince veramente poco!!!!!!!!!!!!!! CAZZO!!!!!!!!!!!!!!!!!! // ***vect1++ = (***vect2++ + alfa*((***vect3++)*(***vect4++) - (***vect5++)*(***vect6++)) + beta*(***vect7++)*(***vect8++))/(1+gamma*(***vect9++)); } /** method to calculate the opposite of a vector */ -inline void neg(double ***vect, int nx, int ny, int nz) { +inline void neg(arr3_double& vect, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) - vect[i][j][k] = -vect[i][j][k]; - - + vect.fetch(i,j,k) = -vect.get(i,j,k); } /** method to calculate the opposite of a vector */ -inline void neg(double ***vect, int nx, int ny) { +inline void neg(arr3_double& vect, int nx, int ny) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) - vect[i][j][0] = -vect[i][j][0]; + vect.fetch(i,j,0) = -vect.get(i,j,0); } /** method to calculate the opposite of a vector */ -inline void neg(double ***vect, int nx) { +inline void neg(arr3_double& vect, int nx) { for (register int i = 0; i < nx; i++) - vect[i][0][0] = -vect[i][0][0]; + vect.fetch(i,0,0) = -vect.get(i,0,0); } /** method to calculate the opposite of a vector */ inline void neg(double *vect, int n) { @@ -359,34 +354,34 @@ inline void neg(double *vect, int n) { } /** method to set equal two vectors */ -inline void eq(double ***vect1, double ***vect2, int nx, int ny, int nz) { +inline void eq(arr3_double& vect1, const arr3_double& vect2, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) - vect1[i][j][k] = vect2[i][j][k]; + vect1.fetch(i,j,k) = vect2.get(i,j,k); } /** method to set equal two vectors */ -inline void eq(double ***vect1, double ***vect2, int nx, int ny) { +inline void eq(arr3_double& vect1, const arr3_double& vect2, int nx, int ny) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) - vect1[i][j][0] = vect2[i][j][0]; + vect1.fetch(i,j,0) = vect2.get(i,j,0); } /** method to set equal two vectors */ -inline void eq(double ****vect1, double ***vect2, int nx, int ny, int is) { +inline void eq(arr4_double& vect1, const arr3_double& vect2, int nx, int ny, int is) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) - vect1[is][i][j][0] = vect2[i][j][0]; + vect1.fetch(is,i,j,0) = vect2.get(i,j,0); } /** method to set equal two vectors */ -inline void eq(double ****vect1, double ***vect2, int nx, int ny, int nz, int is) { +inline void eq(arr4_double& vect1, const arr3_double& vect2, int nx, int ny, int nz, int is) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) - vect1[is][i][j][k] = vect2[i][j][k]; + vect1.fetch(is,i,j,k) = vect2.get(i,j,k); } @@ -395,11 +390,11 @@ inline void eq(double *vect1, double *vect2, int n) { vect1[i] = vect2[i]; } /** method to set a vector to a Value */ -inline void eqValue(double value, double ***vect, int nx, int ny, int nz) { +inline void eqValue(double value, arr3_double& vect, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) - vect[i][j][k] = value; + vect.fetch(i,j,k) = value; } inline void eqValue(double value, double vect[][2][2], int nx, int ny, int nz) { @@ -410,16 +405,16 @@ inline void eqValue(double value, double vect[][2][2], int nx, int ny, int nz) { } /** method to set a vector to a Value */ -inline void eqValue(double value, double ***vect, int nx, int ny) { +inline void eqValue(double value, arr3_double& vect, int nx, int ny) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) - vect[i][j][0] = value; + vect.fetch(i,j,0) = value; } /** method to set a vector to a Value */ -inline void eqValue(double value, double ***vect, int nx) { +inline void eqValue(double value, arr3_double& vect, int nx) { for (register int i = 0; i < nx; i++) - vect[i][0][0] = value; + vect.fetch(i,0,0) = value; } /** method to set a vector to a Value */ diff --git a/include/ComNodes3D.h b/include/ComNodes3D.h index 8e1636e5..7360a02e 100644 --- a/include/ComNodes3D.h +++ b/include/ComNodes3D.h @@ -10,6 +10,7 @@ developers : Stefano Markidis, Giovanni Lapenta #ifndef ComNodes3D_H #define ComNodes_H +#include "arraysfwd.h" #include "ComBasic3D.h" //#include "TimeTasks.h" @@ -19,45 +20,45 @@ developers : Stefano Markidis, Giovanni Lapenta #include "BcFields3D.h" /** communicate ghost cells (FOR NODES) */ -void communicateNode(int nx, int ny, int nz, double ***vector, VirtualTopology3D * vct); +void communicateNode(int nx, int ny, int nz, arr3_double& vector, VirtualTopology3D * vct); /** communicate ghost cells (FOR NODES) */ -void communicateNodeBC(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); +void communicateNodeBC(int nx, int ny, int nz, arr3_double& vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); /** communicate ghost cells (FOR NODES) with particles BC*/ -void communicateNodeBC_P(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); +void communicateNodeBC_P(int nx, int ny, int nz, arr3_double& vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); /** SPECIES: communicate ghost cells */ -void communicateNode(int nx, int ny, int nz, double ****vector, int ns, VirtualTopology3D * vct); +void communicateNode(int nx, int ny, int nz, arr4_double& vector, int ns, VirtualTopology3D * vct); // PARTICLES /** SPECIES: communicate ghost cells */ -void communicateNode_P(int nx, int ny, int nz, double ****vector, int ns, VirtualTopology3D * vct); +void communicateNode_P(int nx, int ny, int nz, arr4_double& vector, int ns, VirtualTopology3D * vct); // /** communicate ghost cells (FOR CENTERS) */ -void communicateCenter(int nx, int ny, int nz, double ***vector, VirtualTopology3D * vct); +void communicateCenter(int nx, int ny, int nz, arr3_double& vector, VirtualTopology3D * vct); /** communicate ghost cells (FOR CENTERS) with BOX stencil*/ -void communicateCenterBoxStencilBC(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); +void communicateCenterBoxStencilBC(int nx, int ny, int nz, arr3_double& vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); // particles /** communicate ghost cells (FOR CENTERS) with BOX stencil*/ -void communicateCenterBoxStencilBC_P(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); +void communicateCenterBoxStencilBC_P(int nx, int ny, int nz, arr3_double& vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); // -void communicateNodeBoxStencilBC(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); +void communicateNodeBoxStencilBC(int nx, int ny, int nz, arr3_double& vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); -void communicateNodeBoxStencilBC_P(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); +void communicateNodeBoxStencilBC_P(int nx, int ny, int nz, arr3_double& vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); /** SPECIES: communicate ghost cells */ -void communicateCenter(int nx, int ny, int nz, double ****vector, int ns, VirtualTopology3D * vct); +void communicateCenter(int nx, int ny, int nz, arr4_double& vector, int ns, VirtualTopology3D * vct); // /////////// communication + BC //////////////////////////// -void communicateCenterBC(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); +void communicateCenterBC(int nx, int ny, int nz, arr3_double& vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); // /////////// communication + BC //////////////////////////// -void communicateCenterBC_P(int nx, int ny, int nz, double ***vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); +void communicateCenterBC_P(int nx, int ny, int nz, arr3_double& vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); #endif diff --git a/include/EMfields3D.h b/include/EMfields3D.h index 3d09049c..9a086d1f 100644 --- a/include/EMfields3D.h +++ b/include/EMfields3D.h @@ -91,9 +91,11 @@ class EMfields3D // :public Field void fixBforcefree(Grid * grid, VirtualTopology3D * vct); /*! Calculate the three components of Pi(implicit pressure) cross image vector */ - void PIdot(double ***PIdotX, double ***PIdotY, double ***PIdotZ, double ***vectX, double ***vectY, double ***vectZ, int ns, Grid * grid); + void PIdot(arr3_double& PIdotX, arr3_double& PIdotY, arr3_double& PIdotZ, + const_arr3_double& vectX, const_arr3_double& vectY, const_arr3_double& vectZ, int ns, Grid * grid); /*! Calculate the three components of mu (implicit permeattivity) cross image vector */ - void MUdot(double ***MUdotX, double ***MUdotY, double ***MUdotZ, double ***vectX, double ***vectY, double ***vectZ, Grid * grid); + void MUdot(arr3_double& MUdotX, arr3_double& MUdotY, arr3_double& MUdotZ, + const_arr3_double& vectX, const_arr3_double& vectY, const_arr3_double& vectZ, Grid * grid); /*! Calculate rho hat, Jx hat, Jy hat, Jz hat */ void calculateHatFunctions(Grid * grid, VirtualTopology3D * vct); @@ -107,9 +109,9 @@ class EMfields3D // :public Field /*! Sum current over different species */ void sumOverSpeciesJ(); /*! Smoothing after the interpolation* */ - void smooth(double value, double ***vector, int type, Grid * grid, VirtualTopology3D * vct); + void smooth(double value, arr3_double& vector, int type, Grid * grid, VirtualTopology3D * vct); /*! SPECIES: Smoothing after the interpolation for species fields* */ - void smooth(double value, double ****vector, int is, int type, Grid * grid, VirtualTopology3D * vct); + void smooth(double value, arr4_double& vector, int is, int type, Grid * grid, VirtualTopology3D * vct); /*! smooth the electric field */ void smoothE(double value, VirtualTopology3D * vct, Collective *col); @@ -145,13 +147,20 @@ class EMfields3D // :public Field /*! Perfect conductor boundary conditions LEFT wall */ - void perfectConductorLeft(double ***imageX, double ***imageY, double ***imageZ, double ***vectorX, double ***vectorY, double ***vectorZ, int dir, Grid * grid); + void perfectConductorLeft(arr3_double& imageX, arr3_double& imageY, arr3_double& imageZ, + const_arr3_double& vectorX, const_arr3_double& vectorY, const_arr3_double& vectorZ, + int dir, Grid * grid); /*! Perfect conductor boundary conditions RIGHT wall */ - void perfectConductorRight(double ***imageX, double ***imageY, double ***imageZ, double ***vectorX, double ***vectorY, double ***vectorZ, int dir, Grid * grid); + void perfectConductorRight( + arr3_double& imageX, arr3_double& imageY, arr3_double& imageZ, + const_arr3_double& vectorX, + const_arr3_double& vectorY, + const_arr3_double& vectorZ, + int dir, Grid * grid); /*! Perfect conductor boundary conditions for source LEFT wall */ - void perfectConductorLeftS(double ***vectorX, double ***vectorY, double ***vectorZ, int dir); + void perfectConductorLeftS(arr3_double& vectorX, arr3_double& vectorY, arr3_double& vectorZ, int dir); /*! Perfect conductor boundary conditions for source RIGHT wall */ - void perfectConductorRightS(double ***vectorX, double ***vectorY, double ***vectorZ, int dir); + void perfectConductorRightS(arr3_double& vectorX, arr3_double& vectorY, arr3_double& vectorZ, int dir); /*! Calculate the sysceptibility tensor on the boundary */ void sustensorRightX(double **susxx, double **susyx, double **suszx); @@ -161,129 +170,97 @@ class EMfields3D // :public Field void sustensorRightZ(double **susxz, double **susyz, double **suszz); void sustensorLeftZ (double **susxz, double **susyz, double **suszz); + /*** accessor methods ***/ + /*! get Potential array */ - double ***getPHI(); - /*! get Electric Field component X defined on node(indexX,indexY,indexZ) */ - double &getEx(int indexX, int indexY, int indexZ) const; - /*! get Electric field X component array */ - double ***getEx(); - /*! get Electric field X component cell array without the ghost cells */ - double ***getExc(Grid3DCU *grid); - /*! get Electric Field component Y defined on node(indexX,indexY,indexZ) */ - double &getEy(int indexX, int indexY, int indexZ) const; - /*! get Electric field Y component array */ - double ***getEy(); - /*! get Electric field Y component cell array without the ghost cells */ - double ***getEyc(Grid3DCU *grid); - /*! get Electric Field component Z defined on node(indexX,indexY,indexZ) */ - double &getEz(int indexX, int indexY, int indexZ) const; - /*! get Electric field Z component array */ - double ***getEz(); - /*! get Electric field Z component cell array without the ghost cells */ - double ***getEzc(Grid3DCU *grid); - /*! get Magnetic Field component X defined on node(indexX,indexY,indexZ) */ - double &getBx(int indexX, int indexY, int indexZ) const; - /*! get Magnetic field X component array */ - double ***getBx(); - /*! get Magnetic field X component cell array without the ghost cells */ - double ***getBxc(); - /*! get Magnetic Field component Y defined on node(indexX,indexY,indexZ) */ - double &getBy(int indexX, int indexY, int indexZ) const; - /*! get Magnetic field Y component array */ - double ***getBy(); - /*! get Magnetic field Y component cell array without the ghost cells */ - double ***getByc(); - /*! get Magnetic Field component Z defined on node(indexX,indexY,indexZ) */ - double &getBz(int indexX, int indexY, int indexZ) const; - /*! get Magnetic field Z component array */ - double ***getBz(); - /*! get Magnetic field Z component cell array without the ghost cells */ - double ***getBzc(); - /*! get density on cell(indexX,indexY,indexZ) */ - double &getRHOc(int indexX, int indexY, int indexZ) const; - /*! get density array on center cell */ - double ***getRHOc(); - /*! get density on nodes(indexX,indexY,indexZ) */ - double &getRHOn(int indexX, int indexY, int indexZ) const; - /*! get density array on nodes */ - double ***getRHOn(); - /*! SPECIES: get density on nodes(indexX,indexY,indexZ) */ - double &getRHOns(int indexX, int indexY, int indexZ, int is) const; - /*! SPECIES: get density on center cell(indexX,indexY,indexZ) */ - double &getRHOcs(int indexX, int indexY, int indexZ, int is) const; - /*! SPECIES: get density array on nodes */ - double ****getRHOns(); - /*! SPECIES: get density array on cells without the ghost cells */ - double ***getRHOcs(Grid3DCU *grid, int is); - - /** get Magnetic Field component X defined on node(indexX,indexY,indexZ) */ - double &getBx_ext(int indexX, int indexY, int indexZ) const; - /** get Magnetic Field component Y defined on node(indexX,indexY,indexZ) */ - double &getBy_ext(int indexX, int indexY, int indexZ) const; - /** get Magnetic Field component Z defined on node(indexX,indexY,indexZ) */ - double &getBz_ext(int indexX, int indexY, int indexZ) const; - - /** get Magnetic Field component X */ - double ***getBx_ext(); - /** get Magnetic Field component Y */ - double ***getBy_ext(); - /** get Magnetic Field component Z */ - double ***getBz_ext(); - - /*! get pressure tensor XX for species */ - double ****getpXXsn(); - /*! get pressure tensor XY for species */ - double ****getpXYsn(); - /*! get pressure tensor XZ for species */ - double ****getpXZsn(); - /*! get pressure tensor YY for species */ - double ****getpYYsn(); - /*! get pressure tensor YZ for species */ - double ****getpYZsn(); - /*! get pressure tensor ZZ for species */ - double ****getpZZsn(); - - /*! get Jx(X,Y,Z) */ - double &getJx(int indexX, int indexY, int indexZ) const; - /*! get current -Direction X */ - double ***getJx(); - /*! get Jxs(X,Y,Z,is) */ - double &getJxs(int indexX, int indexY, int indexZ, int is) const; - /*! SPECIES: get current -Direction X */ - double ****getJxs(); - /*! SPECIES: get current X component for species is in all cells except ghost */ - double ***getJxsc(Grid3DCU *grid, int is); - /*! get Jy(X,Y,Z) */ - double &getJy(int indexX, int indexY, int indexZ) const; - /*! get current -Direction Y */ - double ***getJy(); - /*! get Jys(X,Y,Z,is) */ - double &getJys(int indexX, int indexY, int indexZ, int is) const; - /*! SPECIES: get current -Direction Y */ - double ****getJys(); - /*! SPECIES: get current Y component for species is in all cells except ghost */ - double ***getJysc(Grid3DCU *grid, int is); - /*! get Jz(X,Y,Z) */ - double &getJz(int indexX, int indexY, int indexZ) const; - /*! get current -Direction Z */ - double ***getJz(); - /*! get Jzs(X,Y,Z,is) */ - double &getJzs(int indexX, int indexY, int indexZ, int is) const; - /*! SPECIES: get current -Direction Z */ - double ****getJzs(); - /*! SPECIES: get current Z component for species is in all cells except ghost */ - double ***getJzsc(Grid3DCU *grid, int is); + arr3_double getPHI() {return PHI;} + + // field components defined on nodes + // + double getEx(int X, int Y, int Z) const { return Ex.get(X,Y,Z);} + double getEy(int X, int Y, int Z) const { return Ey.get(X,Y,Z);} + double getEz(int X, int Y, int Z) const { return Ez.get(X,Y,Z);} + double getBx(int X, int Y, int Z) const { return Bxn.get(X,Y,Z);} + double getBy(int X, int Y, int Z) const { return Byn.get(X,Y,Z);} + double getBz(int X, int Y, int Z) const { return Bzn.get(X,Y,Z);} + // + arr3_double getEx() { return Ex; } + arr3_double getEy() { return Ey; } + arr3_double getEz() { return Ez; } + arr3_double getBx() { return Bxn; } + arr3_double getBy() { return Byn; } + arr3_double getBz() { return Bzn; } + + // field components without ghost cells + // + void getExc(arr3_double& arr, Grid3DCU *grid); + void getEyc(arr3_double& arr, Grid3DCU *grid); + void getEzc(arr3_double& arr, Grid3DCU *grid); + void getBxc(arr3_double& arr); + void getByc(arr3_double& arr); + void getBzc(arr3_double& arr); + + arr3_double getRHOc() { return rhoc; } + arr3_double getRHOn() { return rhon; } + double getRHOc(int X, int Y, int Z) const { return rhoc.get(X,Y,Z);} + double getRHOn(int X, int Y, int Z) const { return rhon.get(X,Y,Z);} + + // densities per species: + // + double getRHOcs(int X,int Y,int Z,int is)const{return rhocs.get(is,X,Y,Z);} + double getRHOns(int X,int Y,int Z,int is)const{return rhons.get(is,X,Y,Z);} + arr4_double getRHOns(){return rhons;} + /* density on cells without ghost cells */ + void getRHOcs(arr3_double& arr, Grid3DCU *grid, int is); + + double getBx_ext(int X, int Y, int Z) const{return Bx_ext.get(X,Y,Z);} + double getBy_ext(int X, int Y, int Z) const{return By_ext.get(X,Y,Z);} + double getBz_ext(int X, int Y, int Z) const{return Bz_ext.get(X,Y,Z);} + + arr3_double getBx_ext() { return Bx_ext; } + arr3_double getBy_ext() { return By_ext; } + arr3_double getBz_ext() { return Bz_ext; } + + arr4_double getpXXsn() { return pXXsn; } + arr4_double getpXYsn() { return pXYsn; } + arr4_double getpXZsn() { return pXZsn; } + arr4_double getpYYsn() { return pYYsn; } + arr4_double getpYZsn() { return pYZsn; } + arr4_double getpZZsn() { return pZZsn; } + + double getJx(int X, int Y, int Z) const { return Jx.get(X,Y,Z);} + double getJy(int X, int Y, int Z) const { return Jy.get(X,Y,Z);} + double getJz(int X, int Y, int Z) const { return Jz.get(X,Y,Z);} + arr3_double getJx() { return Jx; } + arr3_double getJy() { return Jy; } + arr3_double getJz() { return Jz; } + arr4_double getJxs() { return Jxs; } + arr4_double getJys() { return Jys; } + arr4_double getJzs() { return Jzs; } + + double getJxs(int X,int Y,int Z,int is)const{return Jxs.get(is,X,Y,Z);} + double getJys(int X,int Y,int Z,int is)const{return Jys.get(is,X,Y,Z);} + double getJzs(int X,int Y,int Z,int is)const{return Jzs.get(is,X,Y,Z);} + + /*** accessor that require computing ***/ + + // get current for species in all cells except ghost + // + void getJxsc(arr3_double& arr, Grid3DCU *grid, int is); + void getJysc(arr3_double& arr, Grid3DCU *grid, int is); + void getJzsc(arr3_double& arr, Grid3DCU *grid, int is); + /*! get the electric field energy */ double getEenergy(); /*! get the magnetic field energy */ double getBenergy(); - /*! fetch array for summing moments of thread i */ - Moments& fetch_momentsArray(int i){ - assert_le(0,i); - assert_le(i,sizeMomentsArray); - return *momentsArray[i]; - } + /*! fetch array for summing moments of thread i */ + Moments& fetch_momentsArray(int i){ + assert_le(0,i); + assert_le(i,sizeMomentsArray); + return *momentsArray[i]; + } /*! print electromagnetic fields info */ void print(void) const; @@ -350,61 +327,61 @@ class EMfields3D // :public Field double L_square; /*! PHI: electric potential (indexX, indexY, indexZ), defined on central points between nodes */ - double ***PHI; - /*! Ex: electric field X-component (indexX, indexY, indexZ), defined on nodes */ - double ***Ex; - /*! Exth: implicit electric field X-component (indexX, indexY, indexZ), defined on nodes */ - double ***Exth; - /*! Ey: electric field Y-component (indexX, indexY, indexZ), defined on nodes */ - double ***Ey; - /*! Eyth: implicit electric field Y-component (indexX, indexY, indexZ), defined on nodes */ - double ***Eyth; - /*! Ez: electric field Z-component (indexX, indexY, indexZ, #species), defined on nodes */ - double ***Ez; - /*! Ezth: implicit electric field Z-component (indexX, indexY, indexZ), defined on nodes */ - double ***Ezth; - /*! Bxc: magnetic field X-component (indexX, indexY, indexZ), defined on central points between nodes */ - double ***Bxc; - /*! Byc: magnetic field Y-component (indexX, indexY, indexZ), defined on central points between nodes */ - double ***Byc; - /*! Bzc: magnetic field Z-component (indexX, indexY, indexZ), defined on central points between nodes */ - double ***Bzc; - /*! Bxn: magnetic field X-component (indexX, indexY, indexZ), defined on nodes */ - double ***Bxn; - /*! Byn: magnetic field Y-component (indexX, indexY, indexZ), defined on nodes */ - double ***Byn; - /*! Bzn: magnetic field Z-component (indexX, indexY, indexZ), defined on nodes */ - double ***Bzn; + array3_double PHI; + + // Electric field components defined on nodes + // + array3_double Ex; + array3_double Ey; + array3_double Ez; + + // implicit electric field components defined on nodes + // + array3_double Exth; + array3_double Eyth; + array3_double Ezth; + + // magnetic field components defined on central points between nodes + // + array3_double Bxc; + array3_double Byc; + array3_double Bzc; + + // magnetic field components defined on nodes + // + array3_double Bxn; + array3_double Byn; + array3_double Bzn; // ************************************* // TEMPORARY ARRAY // ************************************ /*!some temporary arrays (for calculate hat functions) */ - double ***tempXC; - double ***tempYC; - double ***tempZC; - double ***tempXN; - double ***tempYN; - double ***tempZN; + array3_double tempXC; + array3_double tempYC; + array3_double tempZC; + array3_double tempXN; + array3_double tempYN; + array3_double tempZN; /*! other temporary arrays (in MaxwellSource) */ - double ***tempC; - double ***tempX; - double ***tempY; - double ***tempZ; - double ***temp2X; - double ***temp2Y; - double ***temp2Z; + array3_double tempC; + array3_double tempX; + array3_double tempY; + array3_double tempZ; + array3_double temp2X; + array3_double temp2Y; + array3_double temp2Z; /*! and some for MaxwellImage */ - double ***imageX; - double ***imageY; - double ***imageZ; - double ***Dx; - double ***Dy; - double ***Dz; - double ***vectX; - double ***vectY; - double ***vectZ; - double ***divC; + array3_double imageX; + array3_double imageY; + array3_double imageZ; + array3_double Dx; + array3_double Dy; + array3_double Dz; + array3_double vectX; + array3_double vectY; + array3_double vectZ; + array3_double divC; /* temporary arrays for summing moments */ int sizeMomentsArray; Moments **momentsArray; @@ -415,87 +392,78 @@ class EMfields3D // :public Field // ******************************************************************************* /*! Charge density, defined on central points of the cell */ - double ***rhoc; + array3_double rhoc; /*! Charge density, defined on nodes */ - double ***rhon; + array3_double rhon; /*! Implicit charge density, defined on central points of the cell */ - double ***rhoh; + array3_double rhoh; /*! SPECIES: charge density for each species, defined on nodes */ - double ****rhons; + array4_double rhons; /*! SPECIES: charge density for each species, defined on central points of the cell */ - double ****rhocs; - /*! Current density component-X, defined on nodes */ - double ***Jx; - /*! Current density component-Y, defined on nodes */ - double ***Jy; - /*! Current density component-Z, defined on nodes */ - double ***Jz; - /*! Implicit current density X-component, defined on nodes */ - double ***Jxh; - /*! Implicit current density Y-component, defined on nodes */ - double ***Jyh; - /*! Implicit current density Z-component, defined on nodes */ - double ***Jzh; - /*! SPECIES: current density component-X for species, defined on nodes */ - double ****Jxs; - /*! SPECIES: current density component-Y for species, defined on nodes */ - double ****Jys; - /*! SPECIES: current density component-Z for species, defined on nodes */ - double ****Jzs; - /*! External magnetic field component-X, defined on nodes */ - double*** Bx_ext; - /*! External magnetic field component-Y, defined on nodes */ - double*** By_ext; - /*! External magnetic field component-Z, defined on nodes */ - double*** Bz_ext; - /*! External current field component-X, defined on nodes */ - double*** Jx_ext; - /*! External current field component-Y, defined on nodes */ - double*** Jy_ext; - /*! External current field component-Z, defined on nodes */ - double*** Jz_ext; - - /*! SPECIES: pressure tensor component-XX, defined on nodes */ - double ****pXXsn; - /*! SPECIES: pressure tensor component-XY, defined on nodes */ - double ****pXYsn; - /*! SPECIES: pressure tensor component-XZ, defined on nodes */ - double ****pXZsn; - /*! SPECIES: pressure tensor component-XZ, defined on nodes */ - double ****pYYsn; - /*! SPECIES: pressure tensor component-YZ, defined on nodes */ - double ****pYZsn; - /*! SPECIES: pressure tensor component-ZZ, defined on nodes */ - double ****pZZsn; - - - /*! Field Boundary Condition 0 = Dirichlet Boundary Condition: specifies the value to take on the boundary of the domain 1 = Neumann Boundary Condition: specifies the value of derivative to take on the boundary of the domain 2 = Periodic condition */ - - /*! Boundary Condition Electrostatic Potential: FaceXright */ + array4_double rhocs; + + // current density defined on nodes + // + array3_double Jx; + array3_double Jy; + array3_double Jz; + + // implicit current density defined on nodes + // + array3_double Jxh; + array3_double Jyh; + array3_double Jzh; + + // species-specific current densities defined on nodes + // + array4_double Jxs; + array4_double Jys; + array4_double Jzs; + + // magnetic field components defined on nodes + // + array3_double Bx_ext; + array3_double By_ext; + array3_double Bz_ext; + + // external current, defined on nodes + array3_double Jx_ext; + array3_double Jy_ext; + array3_double Jz_ext; + + // pressure tensor components, defined on nodes + array4_double pXXsn; + array4_double pXYsn; + array4_double pXZsn; + array4_double pYYsn; + array4_double pYZsn; + array4_double pZZsn; + + /*! Field Boundary Condition + 0 = Dirichlet Boundary Condition: specifies the + value on the boundary of the domain + 1 = Neumann Boundary Condition: specifies the value of + derivative on the boundary of the domain + 2 = Periodic boundary condition */ + + // boundary conditions for electrostatic potential + // int bcPHIfaceXright; - /*! Boundary Condition Electrostatic Potential:FaceXleft */ int bcPHIfaceXleft; - /*! Boundary Condition Electrostatic Potential:FaceYright */ int bcPHIfaceYright; - /*! Boundary Condition Electrostatic Potential:FaceYleft */ int bcPHIfaceYleft; - /*! Boundary Condition Electrostatic Potential:FaceZright */ int bcPHIfaceZright; - /*! Boundary Condition Electrostatic Potential:FaceZleft */ int bcPHIfaceZleft; /*! Boundary condition for electric field 0 = perfect conductor 1 = magnetic mirror */ - /*! Boundary Condition EM Field: FaceXright */ + // + // boundary conditions for EM field + // int bcEMfaceXright; - /*! Boundary Condition EM Field: FaceXleft */ int bcEMfaceXleft; - /*! Boundary Condition EM Field: FaceYright */ int bcEMfaceYright; - /*! Boundary Condition EM Field: FaceYleft */ int bcEMfaceYleft; - /*! Boundary Condition EM Field: FaceZright */ int bcEMfaceZright; - /*! Boundary Condition EM Field: FaceZleft */ int bcEMfaceZleft; @@ -529,10 +497,13 @@ class EMfields3D // :public Field injInfoFields* get_InfoFieldsRear(); injInfoFields* get_InfoFieldsRight(); - void BoundaryConditionsB(double ***vectorX, double ***vectorY, double ***vectorZ,int nx, int ny, int nz,Grid *grid, VirtualTopology3D *vct); - void BoundaryConditionsE(double ***vectorX, double ***vectorY, double ***vectorZ,int nx, int ny, int nz,Grid *grid, VirtualTopology3D *vct); - void BoundaryConditionsEImage(double ***imageX, double ***imageY, double ***imageZ,double ***vectorX, double ***vectorY, double ***vectorZ,int nx, int ny, int nz, VirtualTopology3D *vct,Grid *grid); - + void BoundaryConditionsB(arr3_double& vectorX, arr3_double& vectorY, arr3_double& vectorZ, + int nx, int ny, int nz,Grid *grid, VirtualTopology3D *vct); + void BoundaryConditionsE(arr3_double& vectorX, arr3_double& vectorY, arr3_double& vectorZ, + int nx, int ny, int nz,Grid *grid, VirtualTopology3D *vct); + void BoundaryConditionsEImage(arr3_double& imageX, arr3_double& imageY, arr3_double& imageZ, + const_arr3_double& vectorX, const_arr3_double& vectorY, const_arr3_double& vectorZ, + int nx, int ny, int nz, VirtualTopology3D *vct,Grid *grid); }; inline void EMfields3D::addRho(double weight[][2][2], int X, int Y, int Z, int is) { diff --git a/include/Grid3DCU.h b/include/Grid3DCU.h index 9d2e2f01..c8cd883d 100644 --- a/include/Grid3DCU.h +++ b/include/Grid3DCU.h @@ -42,44 +42,76 @@ class Grid3DCU // :public Grid /** print grid info */ void print(VirtualTopology3D * ptVCT); /** calculate a derivative along a direction on nodes */ - void derivN(double ***derN, double ****scFieldC, int ns, int dir); + void derivN(arr3_double& derN, + const_arr4_double& scFieldC, int ns, int dir); /** calculate gradient on nodes, given a scalar field defined on central points */ - void gradC2N(double ***gradXN, double ***gradYN, double ***gradZN, double ***scFieldC); + void gradC2N(arr3_double& gradXN, arr3_double& gradYN, arr3_double& gradZN, + const_arr3_double& scFieldC); /** calculate gradient on nodes, given a scalar field defined on central points */ - void gradN2C(double ***gradXC, double ***gradYC, double ***gradZC, double ***scFieldN); + void gradN2C(arr3_double& gradXC, arr3_double& gradYC, arr3_double& gradZC, + const_arr3_double& scFieldN); /** calculate divergence on central points, given a vector field defined on nodes */ - void divN2C(double ***divC, double ***vecFieldXN, double ***vecFieldYN, double ***vecFieldZN); + void divN2C(arr3_double& divC, + const_arr3_double& vecFieldXN, + const_arr3_double& vecFieldYN, + const_arr3_double& vecFieldZN); /** calculate divergence on nodes, given a vector field defined on central points */ - void divC2N(double ***divN, double ***vecFieldXC, double ***vecFieldYC, double ***vecFieldZC); + void divC2N(arr3_double& divN, + const_arr3_double& vecFieldXC, + const_arr3_double& vecFieldYC, + const_arr3_double& vecFieldZC); /** calculate curl on nodes, given a vector field defined on central points */ - void curlC2N(double ***curlXN, double ***curlYN, double ***curlZN, double ***vecFieldXC, double ***vecFieldYC, double ***vecFieldZC); + void curlC2N(arr3_double& curlXN, arr3_double& curlYN, + arr3_double& curlZN, + const_arr3_double& vecFieldXC, + const_arr3_double& vecFieldYC, + const_arr3_double& vecFieldZC); /** calculate curl on central points, given a vector field defined on nodes */ - void curlN2C(double ***curlXC, double ***curlYC, double ***curlZC, double ***vecFieldXN, double ***vecFieldYN, double ***vecFieldZN); + void curlN2C(arr3_double& curlXC, arr3_double& curlYC, arr3_double& curlZC, + const_arr3_double& vecFieldXN, + const_arr3_double& vecFieldYN, + const_arr3_double& vecFieldZN); /** calculate divergence on central points, given a Tensor field defined on nodes */ - void divSymmTensorN2C(double ***divCX, double ***divCY, double ***divCZ, double ****pXX, double ****pXY, double ****pXZ, double ****pYY, double ****pYZ, double ****pZZ, int ns); + void divSymmTensorN2C(arr3_double& divCX, arr3_double& divCY, arr3_double& divCZ, + const_arr4_double& pXX, + const_arr4_double& pXY, + const_arr4_double& pXZ, + const_arr4_double& pYY, + const_arr4_double& pYZ, + const_arr4_double& pZZ, int ns); /** calculate laplacian on nodes, given a scalar field defined on nodes */ - void lapN2N(double ***lapN, double ***scFieldN, VirtualTopology3D * vct); + void lapN2N(arr3_double& lapN, + const_arr3_double& scFieldN, VirtualTopology3D * vct); /** calculate laplacian on central points, given a scalar field defined on central points for Poisson */ - void lapC2Cpoisson(double ***lapC, double ***scFieldC, VirtualTopology3D * vct); + void lapC2Cpoisson(arr3_double& lapC, + arr3_double& scFieldC, VirtualTopology3D * vct); /** calculate laplacian on central points, given a scalar field defined on central points */ - void lapC2C(double ***lapC, double ***scFieldC, VirtualTopology3D * vct); + void lapC2C(arr3_double& lapC, + const_arr3_double& scFieldC, VirtualTopology3D * vct); /** calculate divergence on boundaries */ - void divBCleft(double ***divBC, double ***vectorX, double ***vectorY, double ***vectorZ, int leftActiveNode, int dirDER); + void divBCleft(arr3_double& divBC, + const_arr3_double& vectorX, + const_arr3_double& vectorY, + const_arr3_double& vectorZ, int leftActiveNode, int dirDER); /** calculate divergence on boundaries */ - void divBCright(double ***divBC, double ***vectorX, double ***vectorY, double ***vectorZ, int rightActiveNode, int dirDER); + void divBCright(arr3_double& divBC, + const_arr3_double& vectorX, + const_arr3_double& vectorY, + const_arr3_double& vectorZ, int rightActiveNode, int dirDER); /** calculate derivative on boundaries */ - void derBC(double ***derBC, double ***vector, int leftActiveNode, int dirDER); + void derBC(arr3_double& derBC, + const_arr3_double& vector, int leftActiveNode, int dirDER); /** interpolate on nodes from central points */ - void interpC2N(double ***vecFieldN, double ***vecFieldC); + void interpC2N(arr3_double& vecFieldN, const_arr3_double& vecFieldC); /** interpolate on central points from nodes */ - void interpN2C(double ***vecFieldC, double ***vecFieldN); + void interpN2C(arr3_double& vecFieldC, const_arr3_double& vecFieldN); /** interpolate on central points from nodes */ - void interpN2C(double ****vecFieldC, int ns, double ****vecFieldN); + void interpN2C(arr4_double& vecFieldC, int ns, const_arr4_double& vecFieldN); // /////////// PRIVATE VARIABLES ////////////// private: diff --git a/include/Moments.h b/include/Moments.h index 53fe942f..981ee15b 100644 --- a/include/Moments.h +++ b/include/Moments.h @@ -6,20 +6,20 @@ // class Moments { private: - doubleArr3 rho; + arr3_double rho; /** current density, defined on nodes */ - doubleArr3 Jx; - doubleArr3 Jy; - doubleArr3 Jz; + arr3_double Jx; + arr3_double Jy; + arr3_double Jz; /** pressure tensor components, defined on nodes */ - doubleArr3 pXX; - doubleArr3 pXY; - doubleArr3 pXZ; - doubleArr3 pYY; - doubleArr3 pYZ; - doubleArr3 pZZ; + arr3_double pXX; + arr3_double pXY; + arr3_double pXZ; + arr3_double pYY; + arr3_double pYZ; + arr3_double pZZ; int nx; int ny; int nz; @@ -39,16 +39,16 @@ class Moments { double get_pYZ(int i, int j, int k) const { return pYZ.get(i,j,k); } double get_pZZ(int i, int j, int k) const { return pZZ.get(i,j,k); } // fetch accessors (write access) - doubleArr3& fetch_rho() { return rho; } - doubleArr3& fetch_Jx () { return Jx ; } - doubleArr3& fetch_Jy () { return Jy ; } - doubleArr3& fetch_Jz () { return Jz ; } - doubleArr3& fetch_Pxx() { return pXX; } - doubleArr3& fetch_Pxy() { return pXY; } - doubleArr3& fetch_Pxz() { return pXZ; } - doubleArr3& fetch_Pyy() { return pYY; } - doubleArr3& fetch_Pyz() { return pYZ; } - doubleArr3& fetch_Pzz() { return pZZ; } + arr3_double& fetch_rho() { return rho; } + arr3_double& fetch_Jx () { return Jx ; } + arr3_double& fetch_Jy () { return Jy ; } + arr3_double& fetch_Jz () { return Jz ; } + arr3_double& fetch_Pxx() { return pXX; } + arr3_double& fetch_Pxy() { return pXY; } + arr3_double& fetch_Pxz() { return pXZ; } + arr3_double& fetch_Pyy() { return pYY; } + arr3_double& fetch_Pyz() { return pYZ; } + arr3_double& fetch_Pzz() { return pZZ; } public: Moments(int nxn, int nyn, int nzn) : nx(nxn), diff --git a/include/PSKhdf5adaptor.h b/include/PSKhdf5adaptor.h index 88b3712a..b9faf69c 100644 --- a/include/PSKhdf5adaptor.h +++ b/include/PSKhdf5adaptor.h @@ -9,6 +9,7 @@ #include "hdf5.h" #include "hdf5_hl.h" +#include "arraysfwd.h" namespace PSK { @@ -50,12 +51,12 @@ namespace PSK { void write(const std::string & objname, double d); void write(const std::string & objname, const Dimens dimens, const double *d_array); void write(const std::string & objname, const Dimens dimens, const std::vector < double >&d_array); - void write(const std::string & objname, const Dimens dimens, double ***d_array); - void write(const std::string & objname, const Dimens dimens, const int i, double ****d_array); + void write(const std::string & objname, const Dimens dimens, const_arr3_double d_array); + void write(const std::string & objname, const Dimens dimens, const int i, const_arr4_double d_array); void write(const std::string & objname, const Dimens dimens, double **d_array); - void write(const std::string & objname, const Dimens dimens, const int i, double ***d_array); + void write(const std::string & objname, const Dimens dimens, const int i, const_arr3_double d_array); }; diff --git a/include/TransArraySpace3D.h b/include/TransArraySpace3D.h index 43483fa4..c83710f3 100644 --- a/include/TransArraySpace3D.h +++ b/include/TransArraySpace3D.h @@ -11,7 +11,7 @@ developers : Stefano Markidis, Giovanni Lapenta #define TransArraySpace3D_H /** method to convert a 1D field in a 3D field not considering guard cells*/ -inline void solver2phys(double ***vectPhys, double *vectSolver, int nx, int ny, int nz) { +inline void solver2phys(arr3_double& vectPhys, double *vectSolver, int nx, int ny, int nz) { for (register int i = 1; i < nx - 1; i++) for (register int j = 1; j < ny - 1; j++) for (register int k = 1; k < nz - 1; k++) @@ -19,7 +19,7 @@ inline void solver2phys(double ***vectPhys, double *vectSolver, int nx, int ny, } /** method to convert a 1D field in a 3D field not considering guard cells*/ -inline void solver2phys(double ***vectPhys1, double ***vectPhys2, double ***vectPhys3, double *vectSolver, int nx, int ny, int nz) { +inline void solver2phys(arr3_double& vectPhys1, arr3_double& vectPhys2, arr3_double& vectPhys3, double *vectSolver, int nx, int ny, int nz) { for (register int i = 1; i < nx - 1; i++) for (register int j = 1; j < ny - 1; j++) for (register int k = 1; k < nz - 1; k++) { @@ -29,23 +29,20 @@ inline void solver2phys(double ***vectPhys1, double ***vectPhys2, double ***vect } } /** method to convert a 3D field in a 1D field not considering guard cells*/ -inline void phys2solver(double *vectSolver, double ***vectPhys, int nx, int ny, int nz) { +inline void phys2solver(double *vectSolver, const arr3_double& vectPhys, int nx, int ny, int nz) { for (register int i = 1; i < nx - 1; i++) for (register int j = 1; j < ny - 1; j++) for (register int k = 1; k < nz - 1; k++) - *vectSolver++ = vectPhys[i][j][k]; - - + *vectSolver++ = vectPhys.get(i,j,k); } /** method to convert a 3D field in a 1D field not considering guard cells*/ -inline void phys2solver(double *vectSolver, double ***vectPhys1, double ***vectPhys2, double ***vectPhys3, int nx, int ny, int nz) { +inline void phys2solver(double *vectSolver, const arr3_double& vectPhys1, const arr3_double& vectPhys2, const arr3_double& vectPhys3, int nx, int ny, int nz) { for (register int i = 1; i < nx - 1; i++) for (register int j = 1; j < ny - 1; j++) for (register int k = 1; k < nz - 1; k++) { - *vectSolver++ = vectPhys1[i][j][k]; - *vectSolver++ = vectPhys2[i][j][k]; - *vectSolver++ = vectPhys3[i][j][k]; + *vectSolver++ = vectPhys1.get(i,j,k); + *vectSolver++ = vectPhys2.get(i,j,k); + *vectSolver++ = vectPhys3.get(i,j,k); } - } #endif diff --git a/include/arraysfwd.h b/include/arraysfwd.h new file mode 100644 index 00000000..41fdbc19 --- /dev/null +++ b/include/arraysfwd.h @@ -0,0 +1,52 @@ +/* forward declaration for array classes */ +#ifndef arraysfwd_h +#define arraysfwd_h + +namespace iPic3D +{ + template + class const_array_ref3; + template + class const_array_ref4; + template + class array_ref1; + template + class array_ref2; + template + class array_ref3; + template + class array_ref4; + template + class array1; + template + class array2; + template + class array3; + template + class array4; +} + +// These aliases are defined for the following flexibilization purposes: +// - to avoid filling the code with template brackets +// (i.e., to minimize explicitly template-dependent code). +// - so that they can be redefined according to the user's +// preferred array implementation. +// +//typedef array_ref1 intArr1; +//typedef array_ref2 intArr2; +//typedef array_ref3 intArr3; +//typedef array_ref4 intArr4; +//typedef const_array_ref1 arr1_double; +//typedef const_array_ref2 arr2_double; +// +typedef iPic3D::const_array_ref3 const_arr3_double; +typedef iPic3D::const_array_ref4 const_arr4_double; +typedef iPic3D::array_ref1 arr1_double; +typedef iPic3D::array_ref2 arr2_double; +typedef iPic3D::array_ref3 arr3_double; +typedef iPic3D::array_ref4 arr4_double; +typedef iPic3D::array1 array1_double; +typedef iPic3D::array2 array2_double; +typedef iPic3D::array3 array3_double; +typedef iPic3D::array4 array4_double; +#endif diff --git a/include/phdf5.h b/include/phdf5.h index a6465e98..39dba569 100644 --- a/include/phdf5.h +++ b/include/phdf5.h @@ -9,6 +9,7 @@ using namespace std; #include "mpi.h" #include "hdf5.h" #include "hdf5_hl.h" +#include "arraysfwd.h" class PHDF5fileClass{ @@ -21,9 +22,9 @@ class PHDF5fileClass{ void CreatePHDF5file(double *L, int *dglob, int *dlocl, bool bp); void ClosePHDF5file(); void OpenPHDF5file(); - void ReadPHDF5dataset_double(string dataset, double ***data); + void ReadPHDF5dataset_double(string dataset, arr3_double& data); void ReadPHDF5param(); - int WritePHDF5dataset(string grpname, string datasetname, double ***data, int nx, int ny, int nz); + int WritePHDF5dataset(string grpname, string datasetname, const_arr3_double& data, int nx, int ny, int nz); int getPHDF5ndim(); int getPHDF5ncx(); diff --git a/inputoutput/Collective.cpp b/inputoutput/Collective.cpp index 9f11e3de..140e1647 100644 --- a/inputoutput/Collective.cpp +++ b/inputoutput/Collective.cpp @@ -1,6 +1,7 @@ #include #include "Collective.h" +#include "debug.h" /*! Read the input file from text file and put the data in a collective wrapper: if it's a restart read from input file basic sim data and load particles and EM field from restart file */ void Collective::ReadInput(string inputfile) { diff --git a/inputoutput/WriteOutputParallel.cpp b/inputoutput/WriteOutputParallel.cpp index 6de0d2ea..c024a069 100644 --- a/inputoutput/WriteOutputParallel.cpp +++ b/inputoutput/WriteOutputParallel.cpp @@ -41,17 +41,22 @@ void WriteOutputParallel(Grid3DCU *grid, EMfields3D *EMf, CollectiveIO *col, VCt /* Write the Electric field */ /* ------------------------ */ + array3_double arr3(nxc-2,nyc-2,nzc-2); + grpname = "Fields"; dtaname = "Ex"; - outputfile.WritePHDF5dataset(grpname, dtaname, EMf->getExc(grid), nxc-2, nyc-2, nzc-2); + EMf->getExc(arr3,grid); + outputfile.WritePHDF5dataset(grpname, dtaname, arr3, nxc-2, nyc-2, nzc-2); grpname = "Fields"; dtaname = "Ey"; - outputfile.WritePHDF5dataset(grpname, dtaname, EMf->getEyc(grid), nxc-2, nyc-2, nzc-2); + EMf->getEyc(arr3,grid); + outputfile.WritePHDF5dataset(grpname, dtaname, arr3, nxc-2, nyc-2, nzc-2); grpname = "Fields"; dtaname = "Ez"; - outputfile.WritePHDF5dataset(grpname, dtaname, EMf->getEzc(grid), nxc-2, nyc-2, nzc-2); + EMf->getEzc(arr3,grid); + outputfile.WritePHDF5dataset(grpname, dtaname, arr3, nxc-2, nyc-2, nzc-2); /* ------------------------ */ /* Write the Magnetic field */ @@ -59,15 +64,18 @@ void WriteOutputParallel(Grid3DCU *grid, EMfields3D *EMf, CollectiveIO *col, VCt grpname = "Fields"; dtaname = "Bx"; - outputfile.WritePHDF5dataset(grpname, dtaname, EMf->getBxc(), nxc-2, nyc-2, nzc-2); + EMf->getBxc(arr3); + outputfile.WritePHDF5dataset(grpname, dtaname, arr3, nxc-2, nyc-2, nzc-2); grpname = "Fields"; dtaname = "By"; - outputfile.WritePHDF5dataset(grpname, dtaname, EMf->getByc(), nxc-2, nyc-2, nzc-2); + EMf->getByc(arr3); + outputfile.WritePHDF5dataset(grpname, dtaname, arr3, nxc-2, nyc-2, nzc-2); grpname = "Fields"; dtaname = "Bz"; - outputfile.WritePHDF5dataset(grpname, dtaname, EMf->getBzc(), nxc-2, nyc-2, nzc-2); + EMf->getBzc(arr3); + outputfile.WritePHDF5dataset(grpname, dtaname, arr3, nxc-2, nyc-2, nzc-2); /* ----------------------------------------------- */ /* Write the Charge Density field for each species */ @@ -80,7 +88,9 @@ void WriteOutputParallel(Grid3DCU *grid, EMfields3D *EMf, CollectiveIO *col, VCt grpname = "Fields"; dtaname = "Rho_" + snmbr.str(); - outputfile.WritePHDF5dataset(grpname, dtaname, EMf->getRHOcs(grid, is), nxc-2, nyc-2, nzc-2); + EMf->getRHOcs(arr3,grid, is); + EMf->getRHOcs(arr3, grid, is); + outputfile.WritePHDF5dataset(grpname, dtaname, arr3, nxc-2, nyc-2, nzc-2); } /* ---------------------------------------- */ @@ -94,15 +104,18 @@ void WriteOutputParallel(Grid3DCU *grid, EMfields3D *EMf, CollectiveIO *col, VCt grpname = "Fields"; dtaname = "Jx_" + snmbr.str(); - outputfile.WritePHDF5dataset(grpname, dtaname, EMf->getJxsc(grid, is), nxc-2, nyc-2, nzc-2); + EMf->getJxsc(arr3, grid, is); + outputfile.WritePHDF5dataset(grpname, dtaname, arr3, nxc-2, nyc-2, nzc-2); grpname = "Fields"; dtaname = "Jy_" + snmbr.str(); - outputfile.WritePHDF5dataset(grpname, dtaname, EMf->getJysc(grid, is), nxc-2, nyc-2, nzc-2); + EMf->getJysc(arr3, grid, is); + outputfile.WritePHDF5dataset(grpname, dtaname, arr3, nxc-2, nyc-2, nzc-2); grpname = "Fields"; dtaname = "Jz_" + snmbr.str(); - outputfile.WritePHDF5dataset(grpname, dtaname, EMf->getJzsc(grid, is), nxc-2, nyc-2, nzc-2); + EMf->getJzsc(arr3, grid, is); + outputfile.WritePHDF5dataset(grpname, dtaname, arr3, nxc-2, nyc-2, nzc-2); } outputfile.ClosePHDF5file(); diff --git a/inputoutput/phdf5.cpp b/inputoutput/phdf5.cpp index 3eb39397..5b8368ab 100644 --- a/inputoutput/phdf5.cpp +++ b/inputoutput/phdf5.cpp @@ -3,6 +3,7 @@ #include "phdf5.h" #include "ipicdefs.h" #include "errors.h" +#include "Alloc.h" PHDF5fileClass::PHDF5fileClass(string filestr, int nd, int *coord, MPI_Comm mpicomm){ @@ -111,7 +112,7 @@ void PHDF5fileClass::ClosePHDF5file(){ } -int PHDF5fileClass::WritePHDF5dataset(string grpname, string datasetname, double ***data, int nx, int ny, int nz){ +int PHDF5fileClass::WritePHDF5dataset(string grpname, string datasetname, const_arr3_double& data, int nx, int ny, int nz){ /* -------------------------- */ /* Local variables and arrays */ @@ -265,7 +266,7 @@ void PHDF5fileClass::ReadPHDF5param(){ } -void PHDF5fileClass::ReadPHDF5dataset_double(string datasetname, double ***data){ +void PHDF5fileClass::ReadPHDF5dataset_double(string datasetname, arr3_double& data){ herr_t status; double *filedata; diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index b05ad584..b0cb1426 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -2,6 +2,7 @@ #include "iPic3D.h" #include "TimeTasks.h" #include "ipicdefs.h" +#include "debug.h" using namespace iPic3D; MPIdata* iPic3D::c_Solver::mpi=0; diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 7a4d01d0..469fff44 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -316,16 +316,12 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { cout << "*** MOVER species " << ns << " ***" << NiterMover << " ITERATIONS ****" << endl; } double start_mover_PC = MPI_Wtime(); - double ***Ex = EMf->getEx(); - double ***Ey = EMf->getEy(); - double ***Ez = EMf->getEz(); - double ***Bx = EMf->getBx(); - double ***By = EMf->getBy(); - double ***Bz = EMf->getBz(); - - double ***Bx_ext = EMf->getBx_ext(); - double ***By_ext = EMf->getBy_ext(); - double ***Bz_ext = EMf->getBz_ext(); + const_arr3_double Ex = EMf->getEx(); + const_arr3_double Ey = EMf->getEy(); + const_arr3_double Ez = EMf->getEz(); + const_arr3_double Bx = EMf->getBx(); + const_arr3_double By = EMf->getBy(); + const_arr3_double Bz = EMf->getBz(); const double dto2 = .5 * dt, qomdt2 = qom * dto2 / c; const double inv_dx = 1.0 / dx, inv_dy = 1.0 / dy, inv_dz = 1.0 / dz; @@ -423,32 +419,32 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { const double weight110 = xi[1] * eta[1] * zeta[0] * invVOL; const double weight111 = xi[1] * eta[1] * zeta[1] * invVOL; // - Bxl += weight000 * Bx[ix][iy][iz] + Bx_ext[ix][iy][iz]; - Bxl += weight001 * Bx[ix][iy][iz - 1] + Bx_ext[ix][iy][iz-1]; - Bxl += weight010 * Bx[ix][iy - 1][iz] + Bx_ext[ix][iy-1][iz]; - Bxl += weight011 * Bx[ix][iy - 1][iz - 1] + Bx_ext[ix][iy-1][iz-1]; - Bxl += weight100 * Bx[ix - 1][iy][iz] + Bx_ext[ix-1][iy][iz]; - Bxl += weight101 * Bx[ix - 1][iy][iz - 1] + Bx_ext[ix-1][iy][iz-1]; - Bxl += weight110 * Bx[ix - 1][iy - 1][iz] + Bx_ext[ix-1][iy-1][iz]; - Bxl += weight111 * Bx[ix - 1][iy - 1][iz - 1] + Bx_ext[ix-1][iy-1][iz-1]; + Bxl += weight000 * Bx[ix][iy][iz]; + Bxl += weight001 * Bx[ix][iy][iz - 1]; + Bxl += weight010 * Bx[ix][iy - 1][iz]; + Bxl += weight011 * Bx[ix][iy - 1][iz - 1]; + Bxl += weight100 * Bx[ix - 1][iy][iz]; + Bxl += weight101 * Bx[ix - 1][iy][iz - 1]; + Bxl += weight110 * Bx[ix - 1][iy - 1][iz]; + Bxl += weight111 * Bx[ix - 1][iy - 1][iz - 1]; // - Byl += weight000 * By[ix][iy][iz] + By_ext[ix][iy][iz]; - Byl += weight001 * By[ix][iy][iz - 1] + By_ext[ix][iy][iz-1]; - Byl += weight010 * By[ix][iy - 1][iz] + By_ext[ix][iy-1][iz]; - Byl += weight011 * By[ix][iy - 1][iz - 1] + By_ext[ix][iy-1][iz-1]; - Byl += weight100 * By[ix - 1][iy][iz] + By_ext[ix-1][iy][iz]; - Byl += weight101 * By[ix - 1][iy][iz - 1] + By_ext[ix-1][iy][iz-1]; - Byl += weight110 * By[ix - 1][iy - 1][iz] + By_ext[ix-1][iy-1][iz]; - Byl += weight111 * By[ix - 1][iy - 1][iz - 1] + By_ext[ix-1][iy-1][iz-1]; + Byl += weight000 * By[ix][iy][iz]; + Byl += weight001 * By[ix][iy][iz - 1]; + Byl += weight010 * By[ix][iy - 1][iz]; + Byl += weight011 * By[ix][iy - 1][iz - 1]; + Byl += weight100 * By[ix - 1][iy][iz]; + Byl += weight101 * By[ix - 1][iy][iz - 1]; + Byl += weight110 * By[ix - 1][iy - 1][iz]; + Byl += weight111 * By[ix - 1][iy - 1][iz - 1]; // - Bzl += weight000 * Bz[ix][iy][iz] + Bz_ext[ix][iy][iz]; - Bzl += weight001 * Bz[ix][iy][iz - 1] + Bz_ext[ix][iy][iz-1]; - Bzl += weight010 * Bz[ix][iy - 1][iz] + Bz_ext[ix][iy-1][iz]; - Bzl += weight011 * Bz[ix][iy - 1][iz - 1] + Bz_ext[ix][iy-1][iz-1]; - Bzl += weight100 * Bz[ix - 1][iy][iz] + Bz_ext[ix-1][iy][iz]; - Bzl += weight101 * Bz[ix - 1][iy][iz - 1] + Bz_ext[ix-1][iy][iz-1]; - Bzl += weight110 * Bz[ix - 1][iy - 1][iz] + Bz_ext[ix-1][iy-1][iz]; - Bzl += weight111 * Bz[ix - 1][iy - 1][iz - 1] + Bz_ext[ix-1][iy-1][iz-1]; + Bzl += weight000 * Bz[ix][iy][iz]; + Bzl += weight001 * Bz[ix][iy][iz - 1]; + Bzl += weight010 * Bz[ix][iy - 1][iz]; + Bzl += weight011 * Bz[ix][iy - 1][iz - 1]; + Bzl += weight100 * Bz[ix - 1][iy][iz]; + Bzl += weight101 * Bz[ix - 1][iy][iz - 1]; + Bzl += weight110 * Bz[ix - 1][iy - 1][iz]; + Bzl += weight111 * Bz[ix - 1][iy - 1][iz - 1]; // Exl += weight000 * Ex[ix][iy][iz]; Exl += weight001 * Ex[ix][iy][iz - 1]; diff --git a/tests/Makefile b/tests/Makefile index 10559ab9..01a27091 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -6,9 +6,10 @@ OBJECTS = \ ../utility/asserts.o \ debug.o -FLAGS = -O3 -DNO_MPI -fno-exceptions #-DCHECK_BOUNDS -ggdb +ARRAY_FLAGS = #-DCHAINED_ARRAYS #-DFLAT_ARRAYS #-DCHECK_BOUNDS +FLAGS = -O3 -DNO_MPI -fno-exceptions $(ARRAY_FLAGS) #-ggdb -COMPILER = g++ #icpc # g++ +COMPILER = c++ #icpc # g++ test: clean_test_arrays test_arrays diff --git a/tests/test_arrays.cpp b/tests/test_arrays.cpp index 45cdc92c..bfc7aa66 100644 --- a/tests/test_arrays.cpp +++ b/tests/test_arrays.cpp @@ -336,9 +336,9 @@ void testArr2_diagonal() type** Bold = newArr2(type, dim1, dim2); type** Cold = newArr2(type, dim1, dim2); - Arr2 Aarr(dim1, dim2); - Arr2 Barr(dim1, dim2); - Arr2 Carr(dim1, dim2); + array_ref2 Aarr(dim1, dim2); + array_ref2 Barr(dim1, dim2); + array_ref2 Carr(dim1, dim2); printf("Initializing data ...\n"); for(size_t i=0; i Aarr(dim1, dim2); - Arr2 Barr(dim1, dim2); - Arr2 Carr(dim1, dim2); + array_ref2 Aarr(dim1, dim2); + array_ref2 Barr(dim1, dim2); + array_ref2 Carr(dim1, dim2); printf("Initializing data ...\n"); for(size_t i=0; i +void set_prod3(array_ref3 Aarr,const_arr3 Barr,array_ref3 Carr,int ITERS, size_t dim1,size_t dim2,size_t dim3) +{ + for(int t=0; t void testArr3() { @@ -562,12 +576,12 @@ void testArr3() type*** Bold = newArr3(type, dim1, dim2, dim3); type*** Cold = newArr3(type, dim1, dim2, dim3); - //Array3 Aarr(dim1, dim2, dim3); - //Array3 Barr(dim1, dim2, dim3); - //Array3 Carr(dim1, dim2, dim3); - Arr3 Aarr(dim1, dim2, dim3); - Arr3 Barr(dim1, dim2, dim3); - Arr3 Carr(dim1, dim2, dim3); + //array3 Aarr(dim1, dim2, dim3); + //array3 Barr(dim1, dim2, dim3); + //array3 Carr(dim1, dim2, dim3); + array_ref3 Aarr(dim1, dim2, dim3); + array_ref3 Barr(dim1, dim2, dim3); + array_ref3 Carr(dim1, dim2, dim3); printf("Initializing data ...\n"); for(size_t i=0; i Afbr(dim1, dim2, dim3, dim4); - //Array4 Bfbr(dim1, dim2, dim3, dim4); - //Array4 Cfbr(dim1, dim2, dim3, dim4); + //array4 Afbr(dim1, dim2, dim3, dim4); + //array4 Bfbr(dim1, dim2, dim3, dim4); + //array4 Cfbr(dim1, dim2, dim3, dim4); - //Array4 Afpa(dim1, dim2, dim3, dim4); - //Array4 Bfpa(dim1, dim2, dim3, dim4); - //Array4 Cfpa(dim1, dim2, dim3, dim4); + //array4 Afpa(dim1, dim2, dim3, dim4); + //array4 Bfpa(dim1, dim2, dim3, dim4); + //array4 Cfpa(dim1, dim2, dim3, dim4); - Arr4 Abra(dim1, dim2, dim3, dim4); - Arr4 Bbra(dim1, dim2, dim3, dim4); - Arr4 Cbra(dim1, dim2, dim3, dim4); + array_ref4 Abra(dim1, dim2, dim3, dim4); + array_ref4 Bbra(dim1, dim2, dim3, dim4); + array_ref4 Cbra(dim1, dim2, dim3, dim4); - Arr4 Apar(dim1, dim2, dim3, dim4); - Arr4 Bpar(dim1, dim2, dim3, dim4); - Arr4 Cpar(dim1, dim2, dim3, dim4); + array_ref4 Apar(dim1, dim2, dim3, dim4); + array_ref4 Bpar(dim1, dim2, dim3, dim4); + array_ref4 Cpar(dim1, dim2, dim3, dim4); printf("Initializing data ...\n"); for(size_t i=0; i (diagonal) ===\n"); + //printf("=== testing array_ref2 (diagonal) ===\n"); //testArr2_diagonal(); - //printf("=== testing Arr2 (diagonal) ===\n"); + //printf("=== testing array_ref2 (diagonal) ===\n"); //testArr2_diagonal(); - printf("=== testing Arr2 ===\n"); + printf("=== testing array_ref2 ===\n"); testArr2(); - printf("=== testing Arr2 ===\n"); + printf("=== testing array_ref2 ===\n"); testArr2(); - printf("=== testing Arr3 ===\n"); + printf("=== testing array_ref3 ===\n"); testArr3(); - printf("=== testing Arr3 ===\n"); + printf("=== testing array_ref3 ===\n"); testArr3(); - printf("=== testing Arr4 ===\n"); + printf("=== testing array_ref4 ===\n"); testArr4(); - printf("=== testing Arr4 ===\n"); + printf("=== testing array_ref4 ===\n"); testArr4(); } From f2fe840be04b02cfcbb0d53adffc9dffdc1954ee Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 9 Aug 2013 17:35:24 +0200 Subject: [PATCH 028/118] eliminated unnecessary reference for arrays --- communication/ComNodes3D.cpp | 26 +++++----- fields/EMfields3D.cpp | 54 ++++++++++---------- grids/Grid3DCU.cpp | 34 ++++++------- include/Basic.h | 66 ++++++++++++------------ include/ComNodes3D.h | 26 +++++----- include/EMfields3D.h | 56 ++++++++++----------- include/Grid3DCU.h | 98 ++++++++++++++++++------------------ include/Moments.h | 20 ++++---- include/TransArraySpace3D.h | 8 +-- include/phdf5.h | 4 +- inputoutput/phdf5.cpp | 4 +- 11 files changed, 198 insertions(+), 198 deletions(-) diff --git a/communication/ComNodes3D.cpp b/communication/ComNodes3D.cpp index 5d6f4424..73906404 100644 --- a/communication/ComNodes3D.cpp +++ b/communication/ComNodes3D.cpp @@ -5,7 +5,7 @@ #include "Alloc.h" /** communicate ghost cells (FOR NODES) */ -void communicateNode(int nx, int ny, int nz, arr3_double& _vector, VirtualTopology3D * vct) { +void communicateNode(int nx, int ny, int nz, arr3_double _vector, VirtualTopology3D * vct) { timeTasks.start_communicate(); double ***vector=_vector.fetch_arr3(); @@ -109,7 +109,7 @@ void communicateNode(int nx, int ny, int nz, arr3_double& _vector, VirtualTopolo timeTasks.addto_communicate(); } /** communicate ghost cells (FOR NODES) */ -void communicateNodeBC(int nx, int ny, int nz, arr3_double& _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { +void communicateNodeBC(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks.start_communicate(); double ***vector = _vector.fetch_arr3(); // allocate 6 ghost cell Faces @@ -215,7 +215,7 @@ void communicateNodeBC(int nx, int ny, int nz, arr3_double& _vector, int bcFaceX timeTasks.addto_communicate(); } /** communicate ghost cells (FOR NODES) with particles BC*/ -void communicateNodeBC_P(int nx, int ny, int nz, arr3_double& _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { +void communicateNodeBC_P(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks.start_communicate(); double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces @@ -322,7 +322,7 @@ void communicateNodeBC_P(int nx, int ny, int nz, arr3_double& _vector, int bcFac } /** SPECIES: communicate ghost cells */ -void communicateNode(int nx, int ny, int nz, arr4_double& _vector, int ns, VirtualTopology3D * vct) { +void communicateNode(int nx, int ny, int nz, arr4_double _vector, int ns, VirtualTopology3D * vct) { timeTasks.start_communicate(); double ****vector = _vector.fetch_arr4(); @@ -427,7 +427,7 @@ void communicateNode(int nx, int ny, int nz, arr4_double& _vector, int ns, Virtu // PARTICLES /** SPECIES: communicate ghost cells */ -void communicateNode_P(int nx, int ny, int nz, arr4_double& _vector, int ns, VirtualTopology3D * vct) { +void communicateNode_P(int nx, int ny, int nz, arr4_double _vector, int ns, VirtualTopology3D * vct) { timeTasks.start_communicate(); double ****vector = _vector.fetch_arr4(); @@ -532,7 +532,7 @@ void communicateNode_P(int nx, int ny, int nz, arr4_double& _vector, int ns, Vir // /** communicate ghost cells (FOR CENTERS) */ -void communicateCenter(int nx, int ny, int nz, arr3_double& _vector, VirtualTopology3D * vct) { +void communicateCenter(int nx, int ny, int nz, arr3_double _vector, VirtualTopology3D * vct) { timeTasks.start_communicate(); double ***vector = _vector.fetch_arr3(); @@ -634,7 +634,7 @@ void communicateCenter(int nx, int ny, int nz, arr3_double& _vector, VirtualTopo timeTasks.addto_communicate(); } /** communicate ghost cells (FOR CENTERS) with BOX stencil*/ -void communicateCenterBoxStencilBC(int nx, int ny, int nz, arr3_double& _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { +void communicateCenterBoxStencilBC(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks.start_communicate(); double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces @@ -667,7 +667,7 @@ void communicateCenterBoxStencilBC(int nx, int ny, int nz, arr3_double& _vector, } // particles /** communicate ghost cells (FOR CENTERS) with BOX stencil*/ -void communicateCenterBoxStencilBC_P(int nx, int ny, int nz, arr3_double& _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { +void communicateCenterBoxStencilBC_P(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks.start_communicate(); double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces @@ -702,7 +702,7 @@ void communicateCenterBoxStencilBC_P(int nx, int ny, int nz, arr3_double& _vecto // -void communicateNodeBoxStencilBC(int nx, int ny, int nz, arr3_double& _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { +void communicateNodeBoxStencilBC(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks.start_communicate(); double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces @@ -734,7 +734,7 @@ void communicateNodeBoxStencilBC(int nx, int ny, int nz, arr3_double& _vector, i timeTasks.addto_communicate(); } -void communicateNodeBoxStencilBC_P(int nx, int ny, int nz, arr3_double& _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { +void communicateNodeBoxStencilBC_P(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks.start_communicate(); double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces @@ -769,7 +769,7 @@ void communicateNodeBoxStencilBC_P(int nx, int ny, int nz, arr3_double& _vector, /** SPECIES: communicate ghost cells */ -void communicateCenter(int nx, int ny, int nz, arr4_double& _vector, int ns, VirtualTopology3D * vct) { +void communicateCenter(int nx, int ny, int nz, arr4_double _vector, int ns, VirtualTopology3D * vct) { timeTasks.start_communicate(); double ****vector=_vector.fetch_arr4(); @@ -870,7 +870,7 @@ void communicateCenter(int nx, int ny, int nz, arr4_double& _vector, int ns, Vir timeTasks.addto_communicate(); } // /////////// communication + BC //////////////////////////// -void communicateCenterBC(int nx, int ny, int nz, arr3_double& _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { +void communicateCenterBC(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks.start_communicate(); double ***vector=_vector.fetch_arr3(); @@ -974,7 +974,7 @@ void communicateCenterBC(int nx, int ny, int nz, arr3_double& _vector, int bcFac timeTasks.addto_communicate(); } // /////////// communication + BC //////////////////////////// -void communicateCenterBC_P(int nx, int ny, int nz, arr3_double& _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { +void communicateCenterBC_P(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks.start_communicate(); double ***vector=_vector.fetch_arr3(); diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index c01b149c..93b18100 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -699,7 +699,7 @@ void EMfields3D::MaxwellImage(double *im, double *vector, Grid * grid, VirtualTo } /*! Calculate PI dot (vectX, vectY, vectZ) */ -void EMfields3D::PIdot(arr3_double& PIdotX, arr3_double& PIdotY, arr3_double& PIdotZ, const_arr3_double& vectX, const_arr3_double& vectY, const_arr3_double& vectZ, int ns, Grid * grid) { +void EMfields3D::PIdot(arr3_double PIdotX, arr3_double PIdotY, arr3_double PIdotZ, const_arr3_double vectX, const_arr3_double vectY, const_arr3_double vectZ, int ns, Grid * grid) { double beta, edotb, omcx, omcy, omcz, denom; beta = .5 * qom[ns] * dt / c; for (int i = 1; i < nxn - 1; i++) @@ -716,8 +716,8 @@ void EMfields3D::PIdot(arr3_double& PIdotX, arr3_double& PIdotY, arr3_double& PI } } /*! Calculate MU dot (vectX, vectY, vectZ) */ -void EMfields3D::MUdot(arr3_double& MUdotX, arr3_double& MUdotY, arr3_double& MUdotZ, - const_arr3_double& vectX, const_arr3_double& vectY, const_arr3_double& vectZ, Grid * grid) +void EMfields3D::MUdot(arr3_double MUdotX, arr3_double MUdotY, arr3_double MUdotZ, + const_arr3_double vectX, const_arr3_double vectY, const_arr3_double vectZ, Grid * grid) { double beta, edotb, omcx, omcy, omcz, denom; for (int i = 1; i < nxn - 1; i++) @@ -744,7 +744,7 @@ void EMfields3D::MUdot(arr3_double& MUdotX, arr3_double& MUdotY, arr3_double& MU } } /* Interpolation smoothing: Smoothing (vector must already have ghost cells) TO MAKE SMOOTH value as to be different from 1.0 type = 0 --> center based vector ; type = 1 --> node based vector ; */ -void EMfields3D::smooth(double value, arr3_double& vector, int type, Grid * grid, VirtualTopology3D * vct) { +void EMfields3D::smooth(double value, arr3_double vector, int type, Grid * grid, VirtualTopology3D * vct) { int nvolte = 6; for (int icount = 1; icount < nvolte + 1; icount++) { @@ -841,7 +841,7 @@ void EMfields3D::smoothE(double value, VirtualTopology3D * vct, Collective *col) } /* SPECIES: Interpolation smoothing TO MAKE SMOOTH value as to be different from 1.0 type = 0 --> center based vector type = 1 --> node based vector */ -void EMfields3D::smooth(double value, arr4_double& vector, int is, int type, Grid * grid, VirtualTopology3D * vct) { +void EMfields3D::smooth(double value, arr4_double vector, int is, int type, Grid * grid, VirtualTopology3D * vct) { cout << "Smoothing for Species not implemented in 3D" << endl; } @@ -2488,8 +2488,8 @@ void EMfields3D::sustensorRightZ(double **susxz, double **susyz, double **suszz) } /*! Perfect conductor boundary conditions: LEFT wall */ -void EMfields3D::perfectConductorLeft(arr3_double& imageX, arr3_double& imageY, arr3_double& imageZ, - const_arr3_double& vectorX, const_arr3_double& vectorY, const_arr3_double& vectorZ, +void EMfields3D::perfectConductorLeft(arr3_double imageX, arr3_double imageY, arr3_double imageZ, + const_arr3_double vectorX, const_arr3_double vectorY, const_arr3_double vectorZ, int dir, Grid * grid) { double** susxy; @@ -2552,10 +2552,10 @@ void EMfields3D::perfectConductorLeft(arr3_double& imageX, arr3_double& imageY, /*! Perfect conductor boundary conditions: RIGHT wall */ void EMfields3D::perfectConductorRight( - arr3_double& imageX, arr3_double& imageY, arr3_double& imageZ, - const_arr3_double& vectorX, - const_arr3_double& vectorY, - const_arr3_double& vectorZ, + arr3_double imageX, arr3_double imageY, arr3_double imageZ, + const_arr3_double vectorX, + const_arr3_double vectorY, + const_arr3_double vectorZ, int dir, Grid * grid) { double beta, omcx, omcy, omcz, denom; @@ -2618,7 +2618,7 @@ void EMfields3D::perfectConductorRight( } /*! Perfect conductor boundary conditions for source: LEFT WALL */ -void EMfields3D::perfectConductorLeftS(arr3_double& vectorX, arr3_double& vectorY, arr3_double& vectorZ, int dir) { +void EMfields3D::perfectConductorLeftS(arr3_double vectorX, arr3_double vectorY, arr3_double vectorZ, int dir) { double ebc[3]; @@ -2664,7 +2664,7 @@ void EMfields3D::perfectConductorLeftS(arr3_double& vectorX, arr3_double& vector } /*! Perfect conductor boundary conditions for source: RIGHT WALL */ -void EMfields3D::perfectConductorRightS(arr3_double& vectorX, arr3_double& vectorY, arr3_double& vectorZ, int dir) { +void EMfields3D::perfectConductorRightS(arr3_double vectorX, arr3_double vectorY, arr3_double vectorZ, int dir) { double ebc[3]; @@ -2829,8 +2829,8 @@ void EMfields3D::updateInfoFields(Grid *grid,VirtualTopology3D *vct,Collective * } -void EMfields3D::BoundaryConditionsEImage(arr3_double& imageX, arr3_double& imageY, arr3_double& imageZ, - const_arr3_double& vectorX, const_arr3_double& vectorY, const_arr3_double& vectorZ, +void EMfields3D::BoundaryConditionsEImage(arr3_double imageX, arr3_double imageY, arr3_double imageZ, + const_arr3_double vectorX, const_arr3_double vectorY, const_arr3_double vectorZ, int nx, int ny, int nz, VirtualTopology3D *vct,Grid *grid) { @@ -2892,7 +2892,7 @@ void EMfields3D::BoundaryConditionsEImage(arr3_double& imageX, arr3_double& imag } -void EMfields3D::BoundaryConditionsB(arr3_double& vectorX, arr3_double& vectorY, arr3_double& vectorZ,int nx, int ny, int nz,Grid *grid, VirtualTopology3D *vct){ +void EMfields3D::BoundaryConditionsB(arr3_double vectorX, arr3_double vectorY, arr3_double vectorZ,int nx, int ny, int nz,Grid *grid, VirtualTopology3D *vct){ if(vct->getXleft_neighbor()==MPI_PROC_NULL && bcEMfaceXleft ==2) { for (int j=0; j < ny;j++) @@ -2975,7 +2975,7 @@ void EMfields3D::BoundaryConditionsB(arr3_double& vectorX, arr3_double& vectorY, } -void EMfields3D::BoundaryConditionsE(arr3_double& vectorX, arr3_double& vectorY, arr3_double& vectorZ,int nx, int ny, int nz,Grid *grid, VirtualTopology3D *vct){ +void EMfields3D::BoundaryConditionsE(arr3_double vectorX, arr3_double vectorY, arr3_double vectorZ,int nx, int ny, int nz,Grid *grid, VirtualTopology3D *vct){ if(vct->getXleft_neighbor()==MPI_PROC_NULL && bcEMfaceXleft ==2) { for (int j=0; j < ny;j++) @@ -3058,7 +3058,7 @@ void EMfields3D::BoundaryConditionsE(arr3_double& vectorX, arr3_double& vectorY, } /*! get Electric Field component X array cell without the ghost cells */ -void EMfields3D::getExc(arr3_double& arr, Grid3DCU *grid) { +void EMfields3D::getExc(arr3_double arr, Grid3DCU *grid) { array3_double tmp(nxc,nyc,nzc); grid->interpN2C(tmp, Ex); @@ -3069,7 +3069,7 @@ void EMfields3D::getExc(arr3_double& arr, Grid3DCU *grid) { arr[i-1][j-1][k-1]=tmp[i][j][k]; } /*! get Electric Field component Y array cell without the ghost cells */ -void EMfields3D::getEyc(arr3_double& arr, Grid3DCU *grid) { +void EMfields3D::getEyc(arr3_double arr, Grid3DCU *grid) { array3_double tmp(nxc,nyc,nzc); grid->interpN2C(tmp, Ey); @@ -3080,7 +3080,7 @@ void EMfields3D::getEyc(arr3_double& arr, Grid3DCU *grid) { arr[i-1][j-1][k-1]=tmp[i][j][k]; } /*! get Electric Field component Z array cell without the ghost cells */ -void EMfields3D::getEzc(arr3_double& arr, Grid3DCU *grid) { +void EMfields3D::getEzc(arr3_double arr, Grid3DCU *grid) { array3_double tmp(nxc,nyc,nzc); grid->interpN2C(tmp, Ez); @@ -3091,28 +3091,28 @@ void EMfields3D::getEzc(arr3_double& arr, Grid3DCU *grid) { arr[i-1][j-1][k-1]=tmp[i][j][k]; } /*! get Magnetic Field component X array cell without the ghost cells */ -void EMfields3D::getBxc(arr3_double& arr) { +void EMfields3D::getBxc(arr3_double arr) { for (int i = 1; i < nxc-1; i++) for (int j = 1; j < nyc-1; j++) for (int k = 1; k < nzc-1; k++) arr[i-1][j-1][k-1]=Bxc[i][j][k]; } /*! get Magnetic Field component Y array cell without the ghost cells */ -void EMfields3D::getByc(arr3_double& arr) { +void EMfields3D::getByc(arr3_double arr) { for (int i = 1; i < nxc-1; i++) for (int j = 1; j < nyc-1; j++) for (int k = 1; k < nzc-1; k++) arr[i-1][j-1][k-1]=Byc[i][j][k]; } /*! get Magnetic Field component Z array cell without the ghost cells */ -void EMfields3D::getBzc(arr3_double& arr) { +void EMfields3D::getBzc(arr3_double arr) { for (int i = 1; i < nxc-1; i++) for (int j = 1; j < nyc-1; j++) for (int k = 1; k < nzc-1; k++) arr[i-1][j-1][k-1]=Bzc[i][j][k]; } /*! get species density component X array cell without the ghost cells */ -void EMfields3D::getRHOcs(arr3_double& arr, Grid3DCU *grid, int is) { +void EMfields3D::getRHOcs(arr3_double arr, Grid3DCU *grid, int is) { array4_double tmp(ns,nxc,nyc,nzc); grid->interpN2C(tmp, is, rhons); @@ -3124,7 +3124,7 @@ void EMfields3D::getRHOcs(arr3_double& arr, Grid3DCU *grid, int is) { } /*! get Magnetic Field component X array species is cell without the ghost cells */ -void EMfields3D::getJxsc(arr3_double& arr, Grid3DCU *grid, int is) { +void EMfields3D::getJxsc(arr3_double arr, Grid3DCU *grid, int is) { array4_double tmp(ns,nxc,nyc,nzc); grid->interpN2C(tmp, is, Jxs); @@ -3136,7 +3136,7 @@ void EMfields3D::getJxsc(arr3_double& arr, Grid3DCU *grid, int is) { } /*! get current component Y array species is cell without the ghost cells */ -void EMfields3D::getJysc(arr3_double& arr, Grid3DCU *grid, int is) { +void EMfields3D::getJysc(arr3_double arr, Grid3DCU *grid, int is) { array4_double tmp(ns,nxc,nyc,nzc); grid->interpN2C(tmp, is, Jys); @@ -3147,7 +3147,7 @@ void EMfields3D::getJysc(arr3_double& arr, Grid3DCU *grid, int is) { arr[i-1][j-1][k-1]=tmp[is][i][j][k]; } /*! get current component Z array species is cell without the ghost cells */ -void EMfields3D::getJzsc(arr3_double& arr, Grid3DCU *grid, int is) { +void EMfields3D::getJzsc(arr3_double arr, Grid3DCU *grid, int is) { array4_double tmp(ns,nxc,nyc,nzc); grid->interpN2C(tmp, is, Jzs); diff --git a/grids/Grid3DCU.cpp b/grids/Grid3DCU.cpp index a533f481..755f3096 100644 --- a/grids/Grid3DCU.cpp +++ b/grids/Grid3DCU.cpp @@ -88,7 +88,7 @@ void Grid3DCU::print(VirtualTopology3D * ptVCT) { } /** calculate gradient on nodes, given a scalar field defined on central points */ -void Grid3DCU::gradC2N(arr3_double& gradXN, arr3_double& gradYN, arr3_double& gradZN, const_arr3_double& scFieldC) { +void Grid3DCU::gradC2N(arr3_double gradXN, arr3_double gradYN, arr3_double gradZN, const_arr3_double scFieldC) { for (register int i = 1; i < nxn - 1; i++) for (register int j = 1; j < nyn - 1; j++) for (register int k = 1; k < nzn - 1; k++) { @@ -99,7 +99,7 @@ void Grid3DCU::gradC2N(arr3_double& gradXN, arr3_double& gradYN, arr3_double& gr } /** calculate gradient on nodes, given a scalar field defined on central points */ -void Grid3DCU::gradN2C(arr3_double& gradXC, arr3_double& gradYC, arr3_double& gradZC, const_arr3_double& scFieldN) { +void Grid3DCU::gradN2C(arr3_double gradXC, arr3_double gradYC, arr3_double gradZC, const_arr3_double scFieldN) { for (register int i = 1; i < nxc - 1; i++) for (register int j = 1; j < nyc - 1; j++) for (register int k = 1; k < nzc - 1; k++) { @@ -110,7 +110,7 @@ void Grid3DCU::gradN2C(arr3_double& gradXC, arr3_double& gradYC, arr3_double& gr } /** calculate divergence on central points, given a vector field defined on nodes */ -void Grid3DCU::divN2C(arr3_double& divC, const_arr3_double& vecFieldXN, const_arr3_double& vecFieldYN, const_arr3_double& vecFieldZN) { +void Grid3DCU::divN2C(arr3_double divC, const_arr3_double vecFieldXN, const_arr3_double vecFieldYN, const_arr3_double vecFieldZN) { double compX; double compY; double compZ; @@ -125,7 +125,7 @@ void Grid3DCU::divN2C(arr3_double& divC, const_arr3_double& vecFieldXN, const_ar } /** calculate divergence on central points, given a Tensor field defined on nodes */ -void Grid3DCU::divSymmTensorN2C(arr3_double& divCX, arr3_double& divCY, arr3_double& divCZ, const_arr4_double& pXX, const_arr4_double& pXY, const_arr4_double& pXZ, const_arr4_double& pYY, const_arr4_double& pYZ, const_arr4_double& pZZ, int ns) { +void Grid3DCU::divSymmTensorN2C(arr3_double divCX, arr3_double divCY, arr3_double divCZ, const_arr4_double pXX, const_arr4_double pXY, const_arr4_double pXZ, const_arr4_double pYY, const_arr4_double pYZ, const_arr4_double pZZ, int ns) { double comp1X, comp2X, comp3X; double comp1Y, comp2Y, comp3Y; double comp1Z, comp2Z, comp3Z; @@ -148,7 +148,7 @@ void Grid3DCU::divSymmTensorN2C(arr3_double& divCX, arr3_double& divCY, arr3_dou } /** calculate divergence on nodes, given a vector field defined on central points */ -void Grid3DCU::divC2N(arr3_double& divN, const_arr3_double& vecFieldXC, const_arr3_double& vecFieldYC, const_arr3_double& vecFieldZC) { +void Grid3DCU::divC2N(arr3_double divN, const_arr3_double vecFieldXC, const_arr3_double vecFieldYC, const_arr3_double vecFieldZC) { double compX; double compY; double compZ; @@ -163,7 +163,7 @@ void Grid3DCU::divC2N(arr3_double& divN, const_arr3_double& vecFieldXC, const_ar } /** calculate curl on nodes, given a vector field defined on central points */ -void Grid3DCU::curlC2N(arr3_double& curlXN, arr3_double& curlYN, arr3_double& curlZN, const_arr3_double& vecFieldXC, const_arr3_double& vecFieldYC, const_arr3_double& vecFieldZC) { +void Grid3DCU::curlC2N(arr3_double curlXN, arr3_double curlYN, arr3_double curlZN, const_arr3_double vecFieldXC, const_arr3_double vecFieldYC, const_arr3_double vecFieldZC) { double compZDY, compYDZ; double compXDZ, compZDX; double compYDX, compXDY; @@ -187,8 +187,8 @@ void Grid3DCU::curlC2N(arr3_double& curlXN, arr3_double& curlYN, arr3_double& cu } /** calculate curl on central points, given a vector field defined on nodes */ -void Grid3DCU::curlN2C(arr3_double& curlXC, arr3_double& curlYC, arr3_double& curlZC, - const_arr3_double& vecFieldXN, const_arr3_double& vecFieldYN, const_arr3_double& vecFieldZN) +void Grid3DCU::curlN2C(arr3_double curlXC, arr3_double curlYC, arr3_double curlZC, + const_arr3_double vecFieldXN, const_arr3_double vecFieldYN, const_arr3_double vecFieldZN) { double compZDY, compYDZ; double compXDZ, compZDX; @@ -217,7 +217,7 @@ void Grid3DCU::curlN2C(arr3_double& curlXC, arr3_double& curlYC, arr3_double& cu } /** calculate laplacian on nodes, given a scalar field defined on nodes */ -void Grid3DCU::lapN2N(arr3_double& lapN, const_arr3_double& scFieldN, VirtualTopology3D * vct) { +void Grid3DCU::lapN2N(arr3_double lapN, const_arr3_double scFieldN, VirtualTopology3D * vct) { // calculate laplacian as divercence of gradient // allocate 3 gradients: defined on central points array3_double gradXC(nxc, nyc, nzc); @@ -233,7 +233,7 @@ void Grid3DCU::lapN2N(arr3_double& lapN, const_arr3_double& scFieldN, VirtualTop } /** calculate laplacian on central points, given a scalar field defined on central points */ -void Grid3DCU::lapC2C(arr3_double& lapC, const_arr3_double& scFieldC, VirtualTopology3D * vct) { +void Grid3DCU::lapC2C(arr3_double lapC, const_arr3_double scFieldC, VirtualTopology3D * vct) { // calculate laplacian as divercence of gradient // allocate 3 gradients: defined on nodes array3_double gradXN(nxn, nyn, nzn); @@ -269,7 +269,7 @@ void Grid3DCU::lapC2C(arr3_double& lapC, const_arr3_double& scFieldC, VirtualTop } /** calculate laplacian on central points, given a scalar field defined on central points for Poisson */ -void Grid3DCU::lapC2Cpoisson(arr3_double& lapC, arr3_double& scFieldC, VirtualTopology3D * vct) { +void Grid3DCU::lapC2Cpoisson(arr3_double lapC, arr3_double scFieldC, VirtualTopology3D * vct) { // communicate first the scFieldC communicateCenterBoxStencilBC(nxc, nyc, nzc, scFieldC, 1, 1, 1, 1, 1, 1, vct); for (register int i = 1; i < nxc - 1; i++) @@ -279,7 +279,7 @@ void Grid3DCU::lapC2Cpoisson(arr3_double& lapC, arr3_double& scFieldC, VirtualTo } /** calculate divergence on boundaries */ -void Grid3DCU::divBCleft(arr3_double& divBC, const_arr3_double& vectorX, const_arr3_double& vectorY, const_arr3_double& vectorZ, int leftActiveNode, int dirDER) { +void Grid3DCU::divBCleft(arr3_double divBC, const_arr3_double vectorX, const_arr3_double vectorY, const_arr3_double vectorZ, int leftActiveNode, int dirDER) { double compX, compY, compZ; switch (dirDER) { case 0: // DIVERGENCE DIRECTION X @@ -316,7 +316,7 @@ void Grid3DCU::divBCleft(arr3_double& divBC, const_arr3_double& vectorX, const_a } /** calculate divergence on boundaries */ -void Grid3DCU::divBCright(arr3_double& divBC, const_arr3_double& vectorX, const_arr3_double& vectorY, const_arr3_double& vectorZ, int rightActiveNode, int dirDER) { +void Grid3DCU::divBCright(arr3_double divBC, const_arr3_double vectorX, const_arr3_double vectorY, const_arr3_double vectorZ, int rightActiveNode, int dirDER) { double compX, compY, compZ; @@ -355,7 +355,7 @@ void Grid3DCU::divBCright(arr3_double& divBC, const_arr3_double& vectorX, const_ } /** calculate derivative on left boundary */ -void Grid3DCU::derBC(arr3_double& derBC, const_arr3_double& vector, int leftActiveNode, int dirDER) { +void Grid3DCU::derBC(arr3_double derBC, const_arr3_double vector, int leftActiveNode, int dirDER) { switch (dirDER) { case 0: // DERIVATIVE DIRECTION X for (register int j = 1; j < nyc - 1; j++) @@ -378,7 +378,7 @@ void Grid3DCU::derBC(arr3_double& derBC, const_arr3_double& vector, int leftActi } /** interpolate on nodes from central points: do this for the magnetic field*/ -void Grid3DCU::interpC2N(arr3_double& vecFieldN, const_arr3_double& vecFieldC) { +void Grid3DCU::interpC2N(arr3_double vecFieldN, const_arr3_double vecFieldC) { for (register int i = 1; i < nxn - 1; i++) for (register int j = 1; j < nyn - 1; j++) for (register int k = 1; k < nzn - 1; k++) @@ -386,7 +386,7 @@ void Grid3DCU::interpC2N(arr3_double& vecFieldN, const_arr3_double& vecFieldC) { } /** interpolate on central points from nodes */ -void Grid3DCU::interpN2C(arr3_double& vecFieldC, const_arr3_double& vecFieldN) { +void Grid3DCU::interpN2C(arr3_double vecFieldC, const_arr3_double vecFieldN) { for (register int i = 1; i < nxc - 1; i++) for (register int j = 1; j < nyc - 1; j++) for (register int k = 1; k < nzc - 1; k++) @@ -394,7 +394,7 @@ void Grid3DCU::interpN2C(arr3_double& vecFieldC, const_arr3_double& vecFieldN) { } /** interpolate on central points from nodes */ -void Grid3DCU::interpN2C(arr4_double& vecFieldC, int ns, const_arr4_double& vecFieldN) { +void Grid3DCU::interpN2C(arr4_double vecFieldC, int ns, const_arr4_double vecFieldN) { for (register int i = 1; i < nxc - 1; i++) for (register int j = 1; j < nyc - 1; j++) for (register int k = 1; k < nzc - 1; k++) diff --git a/include/Basic.h b/include/Basic.h index f444b216..2dd7da6c 100644 --- a/include/Basic.h +++ b/include/Basic.h @@ -64,7 +64,7 @@ inline double norm2(double **vect, int nx, int ny) { return (result); } /** method to calculate the square norm of a vector */ -inline double norm2(const arr3_double& vect, int nx, int ny) { +inline double norm2(const arr3_double vect, int nx, int ny) { double result = 0; for (int i = 0; i < nx; i++) for (int j = 0; j < ny; j++) @@ -82,7 +82,7 @@ inline double norm2(double *vect, int nx) { /** method to calculate the parallel dot product */ -inline double norm2P(const arr3_double& vect, int nx, int ny, int nz) { +inline double norm2P(const arr3_double vect, int nx, int ny, int nz) { double result = 0; double local_result = 0; for (int i = 0; i < nx; i++) @@ -128,7 +128,7 @@ inline void sum(double *vect1, double *vect2, int n) { } /** method to calculate the sum of two vectors vector1 = vector1 + vector2*/ -inline void sum(arr3_double& vect1, const arr3_double& vect2, int nx, int ny, int nz) { +inline void sum(arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) @@ -136,14 +136,14 @@ inline void sum(arr3_double& vect1, const arr3_double& vect2, int nx, int ny, in } /** method to calculate the sum of two vectors vector1 = vector1 + vector2*/ -inline void sum(arr3_double& vect1, const arr3_double& vect2, int nx, int ny) { +inline void sum(arr3_double vect1, const arr3_double vect2, int nx, int ny) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) vect1.fetch(i,j,0) += vect2.get(i,j,0); } /** method to calculate the sum of two vectors vector1 = vector1 + vector2*/ -inline void sum(arr3_double& vect1, const arr4_double& vect2, int nx, int ny, int nz, int ns) { +inline void sum(arr3_double vect1, const arr4_double vect2, int nx, int ny, int nz, int ns) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) @@ -151,13 +151,13 @@ inline void sum(arr3_double& vect1, const arr4_double& vect2, int nx, int ny, in } /** method to calculate the sum of two vectors vector1 = vector1 + vector2*/ -inline void sum(arr3_double& vect1, const arr4_double& vect2, int nx, int ny, int ns) { +inline void sum(arr3_double vect1, const arr4_double vect2, int nx, int ny, int ns) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) vect1.fetch(i,j,0) += vect2.get(ns,i,j,0); } /** method to calculate the subtraction of two vectors vector1 = vector1 - vector2*/ -inline void sub(arr3_double& vect1, const arr3_double& vect2, int nx, int ny, int nz) { +inline void sub(arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) @@ -165,7 +165,7 @@ inline void sub(arr3_double& vect1, const arr3_double& vect2, int nx, int ny, in } /** method to calculate the subtraction of two vectors vector1 = vector1 - vector2*/ -inline void sub(arr3_double& vect1, const arr3_double& vect2, int nx, int ny) { +inline void sub(arr3_double vect1, const arr3_double vect2, int nx, int ny) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) vect1.fetch(i,j,0) -= vect2.get(i,j,0); @@ -173,7 +173,7 @@ inline void sub(arr3_double& vect1, const arr3_double& vect2, int nx, int ny) { /** method to sum 4 vectors vector1 = alfa*vector1 + beta*vector2 + gamma*vector3 + delta*vector4 */ -inline void sum4(arr3_double& vect1, double alfa, const arr3_double& vect2, double beta, const arr3_double& vect3, double gamma, const arr3_double& vect4, double delta, const arr3_double& vect5, int nx, int ny, int nz) { +inline void sum4(arr3_double vect1, double alfa, const arr3_double vect2, double beta, const arr3_double vect3, double gamma, const arr3_double vect4, double delta, const arr3_double vect5, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) @@ -187,7 +187,7 @@ inline void scale(double *vect, double alfa, int n) { } /** method to calculate the scalar-vector product */ -inline void scale(arr3_double& vect, double alfa, int nx, int ny) { +inline void scale(arr3_double vect, double alfa, int nx, int ny) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) vect.fetch(i,j,0) *= alfa; @@ -195,7 +195,7 @@ inline void scale(arr3_double& vect, double alfa, int nx, int ny) { /** method to calculate the scalar-vector product */ -inline void scale(arr3_double& vect, double alfa, int nx, int ny, int nz) { +inline void scale(arr3_double vect, double alfa, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) @@ -209,7 +209,7 @@ inline void scale(double vect[][2][2], double alfa, int nx, int ny, int nz) { vect[i][j][k] *= alfa; } /** method to calculate the scalar-vector product */ -inline void scale(arr3_double& vect1, const arr3_double& vect2, double alfa, int nx, int ny, int nz) { +inline void scale(arr3_double vect1, const arr3_double vect2, double alfa, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) @@ -217,7 +217,7 @@ inline void scale(arr3_double& vect1, const arr3_double& vect2, double alfa, int } /** method to calculate the scalar-vector product */ -inline void scale(arr3_double& vect1, const arr3_double& vect2, double alfa, int nx, int ny) { +inline void scale(arr3_double vect1, const arr3_double vect2, double alfa, int nx, int ny) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) vect1.fetch(i,j,0) = vect2.get(i,j,0) * alfa; @@ -230,7 +230,7 @@ inline void scale(double *vect1, double *vect2, double alfa, int n) { } /** method to calculate vector1 = vector1 + alfa*vector2 */ -inline void addscale(double alfa, arr3_double& vect1, const arr3_double& vect2, int nx, int ny, int nz) { +inline void addscale(double alfa, arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) @@ -245,7 +245,7 @@ inline void addscale(double alfa, double vect1[][2][2], double vect2[][2][2], in } /** method to calculate vector1 = vector1 + alfa*vector2 */ -inline void addscale(double alfa, arr3_double& vect1, const arr3_double& vect2, int nx, int ny) { +inline void addscale(double alfa, arr3_double vect1, const arr3_double vect2, int nx, int ny) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) vect1.fetch(i,j,0) += alfa * vect2.get(i,j,0); @@ -263,7 +263,7 @@ inline void addscale(double alfa, double beta, double *vect1, double *vect2, int } /** method to calculate vector1 = beta*vector1 + alfa*vector2 */ -inline void addscale(double alfa, double beta, arr3_double& vect1, const arr3_double& vect2, int nx, int ny, int nz) { +inline void addscale(double alfa, double beta, arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) @@ -273,7 +273,7 @@ inline void addscale(double alfa, double beta, arr3_double& vect1, const arr3_do } /** method to calculate vector1 = beta*vector1 + alfa*vector2 */ -inline void addscale(double alfa, double beta, arr3_double& vect1, const arr3_double& vect2, int nx, int ny) { +inline void addscale(double alfa, double beta, arr3_double vect1, const arr3_double vect2, int nx, int ny) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) vect1.fetch(i,j,0) = beta * vect1.get(i,j,0) + alfa * vect2.get(i,j,0); @@ -282,21 +282,21 @@ inline void addscale(double alfa, double beta, arr3_double& vect1, const arr3_do /** method to calculate vector1 = alfa*vector2 + beta*vector3 */ -inline void scaleandsum(arr3_double& vect1, double alfa, double beta, const arr3_double& vect2, const arr3_double& vect3, int nx, int ny, int nz) { +inline void scaleandsum(arr3_double vect1, double alfa, double beta, const arr3_double vect2, const arr3_double vect3, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) vect1.fetch(i,j,k) = alfa * vect2.get(i,j,k) + beta * vect3.get(i,j,k); } /** method to calculate vector1 = alfa*vector2 + beta*vector3 with vector2 depending on species*/ -inline void scaleandsum(arr3_double& vect1, double alfa, double beta, const arr4_double& vect2, const arr3_double& vect3, int ns, int nx, int ny, int nz) { +inline void scaleandsum(arr3_double vect1, double alfa, double beta, const arr4_double vect2, const arr3_double vect3, int ns, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) vect1.fetch(i,j,k) = alfa * vect2.get(ns,i,j,k) + beta * vect3.get(i,j,k); } /** method to calculate vector1 = alfa*vector2*vector3 with vector2 depending on species*/ -inline void prod(arr3_double& vect1, double alfa, const arr4_double& vect2, int ns, const arr3_double& vect3, int nx, int ny, int nz) { +inline void prod(arr3_double vect1, double alfa, const arr4_double vect2, int ns, const arr3_double vect3, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) @@ -304,21 +304,21 @@ inline void prod(arr3_double& vect1, double alfa, const arr4_double& vect2, int } /** method to calculate vect1 = vect2/alfa */ -inline void div(arr3_double& vect1, double alfa, const arr3_double& vect2, int nx, int ny, int nz) { +inline void div(arr3_double vect1, double alfa, const arr3_double vect2, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) vect1.fetch(i,j,k) = vect2.get(i,j,k) / alfa; } -inline void prod6(arr3_double& vect1, const arr3_double& vect2, const arr3_double& vect3, const arr3_double& vect4, const arr3_double& vect5, const arr3_double& vect6, const arr3_double& vect7, int nx, int ny, int nz) { +inline void prod6(arr3_double vect1, const arr3_double vect2, const arr3_double vect3, const arr3_double vect4, const arr3_double vect5, const arr3_double vect6, const arr3_double vect7, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) vect1.fetch(i,j,k) = vect2.get(i,j,k) * vect3.get(i,j,k) + vect4.get(i,j,k) * vect5.get(i,j,k) + vect6.get(i,j,k) * vect7.get(i,j,k); } /** method used for calculating PI */ -inline void proddiv(arr3_double& vect1, const arr3_double& vect2, double alfa, const arr3_double& vect3, const arr3_double& vect4, const arr3_double& vect5, const arr3_double& vect6, double beta, const arr3_double& vect7, const arr3_double& vect8, double gamma, const arr3_double& vect9, int nx, int ny, int nz) { +inline void proddiv(arr3_double vect1, const arr3_double vect2, double alfa, const arr3_double vect3, const arr3_double vect4, const arr3_double vect5, const arr3_double vect6, double beta, const arr3_double vect7, const arr3_double vect8, double gamma, const arr3_double vect9, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) @@ -328,7 +328,7 @@ inline void proddiv(arr3_double& vect1, const arr3_double& vect2, double alfa, c // ***vect1++ = (***vect2++ + alfa*((***vect3++)*(***vect4++) - (***vect5++)*(***vect6++)) + beta*(***vect7++)*(***vect8++))/(1+gamma*(***vect9++)); } /** method to calculate the opposite of a vector */ -inline void neg(arr3_double& vect, int nx, int ny, int nz) { +inline void neg(arr3_double vect, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) @@ -336,13 +336,13 @@ inline void neg(arr3_double& vect, int nx, int ny, int nz) { } /** method to calculate the opposite of a vector */ -inline void neg(arr3_double& vect, int nx, int ny) { +inline void neg(arr3_double vect, int nx, int ny) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) vect.fetch(i,j,0) = -vect.get(i,j,0); } /** method to calculate the opposite of a vector */ -inline void neg(arr3_double& vect, int nx) { +inline void neg(arr3_double vect, int nx) { for (register int i = 0; i < nx; i++) vect.fetch(i,0,0) = -vect.get(i,0,0); } @@ -354,7 +354,7 @@ inline void neg(double *vect, int n) { } /** method to set equal two vectors */ -inline void eq(arr3_double& vect1, const arr3_double& vect2, int nx, int ny, int nz) { +inline void eq(arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) @@ -362,7 +362,7 @@ inline void eq(arr3_double& vect1, const arr3_double& vect2, int nx, int ny, int } /** method to set equal two vectors */ -inline void eq(arr3_double& vect1, const arr3_double& vect2, int nx, int ny) { +inline void eq(arr3_double vect1, const arr3_double vect2, int nx, int ny) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) vect1.fetch(i,j,0) = vect2.get(i,j,0); @@ -370,14 +370,14 @@ inline void eq(arr3_double& vect1, const arr3_double& vect2, int nx, int ny) { } /** method to set equal two vectors */ -inline void eq(arr4_double& vect1, const arr3_double& vect2, int nx, int ny, int is) { +inline void eq(arr4_double vect1, const arr3_double vect2, int nx, int ny, int is) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) vect1.fetch(is,i,j,0) = vect2.get(i,j,0); } /** method to set equal two vectors */ -inline void eq(arr4_double& vect1, const arr3_double& vect2, int nx, int ny, int nz, int is) { +inline void eq(arr4_double vect1, const arr3_double vect2, int nx, int ny, int nz, int is) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) @@ -390,7 +390,7 @@ inline void eq(double *vect1, double *vect2, int n) { vect1[i] = vect2[i]; } /** method to set a vector to a Value */ -inline void eqValue(double value, arr3_double& vect, int nx, int ny, int nz) { +inline void eqValue(double value, arr3_double vect, int nx, int ny, int nz) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) @@ -405,14 +405,14 @@ inline void eqValue(double value, double vect[][2][2], int nx, int ny, int nz) { } /** method to set a vector to a Value */ -inline void eqValue(double value, arr3_double& vect, int nx, int ny) { +inline void eqValue(double value, arr3_double vect, int nx, int ny) { for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) vect.fetch(i,j,0) = value; } /** method to set a vector to a Value */ -inline void eqValue(double value, arr3_double& vect, int nx) { +inline void eqValue(double value, arr3_double vect, int nx) { for (register int i = 0; i < nx; i++) vect.fetch(i,0,0) = value; diff --git a/include/ComNodes3D.h b/include/ComNodes3D.h index 7360a02e..c7e86731 100644 --- a/include/ComNodes3D.h +++ b/include/ComNodes3D.h @@ -20,45 +20,45 @@ developers : Stefano Markidis, Giovanni Lapenta #include "BcFields3D.h" /** communicate ghost cells (FOR NODES) */ -void communicateNode(int nx, int ny, int nz, arr3_double& vector, VirtualTopology3D * vct); +void communicateNode(int nx, int ny, int nz, arr3_double vector, VirtualTopology3D * vct); /** communicate ghost cells (FOR NODES) */ -void communicateNodeBC(int nx, int ny, int nz, arr3_double& vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); +void communicateNodeBC(int nx, int ny, int nz, arr3_double vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); /** communicate ghost cells (FOR NODES) with particles BC*/ -void communicateNodeBC_P(int nx, int ny, int nz, arr3_double& vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); +void communicateNodeBC_P(int nx, int ny, int nz, arr3_double vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); /** SPECIES: communicate ghost cells */ -void communicateNode(int nx, int ny, int nz, arr4_double& vector, int ns, VirtualTopology3D * vct); +void communicateNode(int nx, int ny, int nz, arr4_double vector, int ns, VirtualTopology3D * vct); // PARTICLES /** SPECIES: communicate ghost cells */ -void communicateNode_P(int nx, int ny, int nz, arr4_double& vector, int ns, VirtualTopology3D * vct); +void communicateNode_P(int nx, int ny, int nz, arr4_double vector, int ns, VirtualTopology3D * vct); // /** communicate ghost cells (FOR CENTERS) */ -void communicateCenter(int nx, int ny, int nz, arr3_double& vector, VirtualTopology3D * vct); +void communicateCenter(int nx, int ny, int nz, arr3_double vector, VirtualTopology3D * vct); /** communicate ghost cells (FOR CENTERS) with BOX stencil*/ -void communicateCenterBoxStencilBC(int nx, int ny, int nz, arr3_double& vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); +void communicateCenterBoxStencilBC(int nx, int ny, int nz, arr3_double vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); // particles /** communicate ghost cells (FOR CENTERS) with BOX stencil*/ -void communicateCenterBoxStencilBC_P(int nx, int ny, int nz, arr3_double& vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); +void communicateCenterBoxStencilBC_P(int nx, int ny, int nz, arr3_double vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); // -void communicateNodeBoxStencilBC(int nx, int ny, int nz, arr3_double& vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); +void communicateNodeBoxStencilBC(int nx, int ny, int nz, arr3_double vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); -void communicateNodeBoxStencilBC_P(int nx, int ny, int nz, arr3_double& vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); +void communicateNodeBoxStencilBC_P(int nx, int ny, int nz, arr3_double vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); /** SPECIES: communicate ghost cells */ -void communicateCenter(int nx, int ny, int nz, arr4_double& vector, int ns, VirtualTopology3D * vct); +void communicateCenter(int nx, int ny, int nz, arr4_double vector, int ns, VirtualTopology3D * vct); // /////////// communication + BC //////////////////////////// -void communicateCenterBC(int nx, int ny, int nz, arr3_double& vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); +void communicateCenterBC(int nx, int ny, int nz, arr3_double vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); // /////////// communication + BC //////////////////////////// -void communicateCenterBC_P(int nx, int ny, int nz, arr3_double& vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); +void communicateCenterBC_P(int nx, int ny, int nz, arr3_double vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct); #endif diff --git a/include/EMfields3D.h b/include/EMfields3D.h index 9a086d1f..335a45ea 100644 --- a/include/EMfields3D.h +++ b/include/EMfields3D.h @@ -91,11 +91,11 @@ class EMfields3D // :public Field void fixBforcefree(Grid * grid, VirtualTopology3D * vct); /*! Calculate the three components of Pi(implicit pressure) cross image vector */ - void PIdot(arr3_double& PIdotX, arr3_double& PIdotY, arr3_double& PIdotZ, - const_arr3_double& vectX, const_arr3_double& vectY, const_arr3_double& vectZ, int ns, Grid * grid); + void PIdot(arr3_double PIdotX, arr3_double PIdotY, arr3_double PIdotZ, + const_arr3_double vectX, const_arr3_double vectY, const_arr3_double vectZ, int ns, Grid * grid); /*! Calculate the three components of mu (implicit permeattivity) cross image vector */ - void MUdot(arr3_double& MUdotX, arr3_double& MUdotY, arr3_double& MUdotZ, - const_arr3_double& vectX, const_arr3_double& vectY, const_arr3_double& vectZ, Grid * grid); + void MUdot(arr3_double MUdotX, arr3_double MUdotY, arr3_double MUdotZ, + const_arr3_double vectX, const_arr3_double vectY, const_arr3_double vectZ, Grid * grid); /*! Calculate rho hat, Jx hat, Jy hat, Jz hat */ void calculateHatFunctions(Grid * grid, VirtualTopology3D * vct); @@ -109,9 +109,9 @@ class EMfields3D // :public Field /*! Sum current over different species */ void sumOverSpeciesJ(); /*! Smoothing after the interpolation* */ - void smooth(double value, arr3_double& vector, int type, Grid * grid, VirtualTopology3D * vct); + void smooth(double value, arr3_double vector, int type, Grid * grid, VirtualTopology3D * vct); /*! SPECIES: Smoothing after the interpolation for species fields* */ - void smooth(double value, arr4_double& vector, int is, int type, Grid * grid, VirtualTopology3D * vct); + void smooth(double value, arr4_double vector, int is, int type, Grid * grid, VirtualTopology3D * vct); /*! smooth the electric field */ void smoothE(double value, VirtualTopology3D * vct, Collective *col); @@ -147,20 +147,20 @@ class EMfields3D // :public Field /*! Perfect conductor boundary conditions LEFT wall */ - void perfectConductorLeft(arr3_double& imageX, arr3_double& imageY, arr3_double& imageZ, - const_arr3_double& vectorX, const_arr3_double& vectorY, const_arr3_double& vectorZ, + void perfectConductorLeft(arr3_double imageX, arr3_double imageY, arr3_double imageZ, + const_arr3_double vectorX, const_arr3_double vectorY, const_arr3_double vectorZ, int dir, Grid * grid); /*! Perfect conductor boundary conditions RIGHT wall */ void perfectConductorRight( - arr3_double& imageX, arr3_double& imageY, arr3_double& imageZ, - const_arr3_double& vectorX, - const_arr3_double& vectorY, - const_arr3_double& vectorZ, + arr3_double imageX, arr3_double imageY, arr3_double imageZ, + const_arr3_double vectorX, + const_arr3_double vectorY, + const_arr3_double vectorZ, int dir, Grid * grid); /*! Perfect conductor boundary conditions for source LEFT wall */ - void perfectConductorLeftS(arr3_double& vectorX, arr3_double& vectorY, arr3_double& vectorZ, int dir); + void perfectConductorLeftS(arr3_double vectorX, arr3_double vectorY, arr3_double vectorZ, int dir); /*! Perfect conductor boundary conditions for source RIGHT wall */ - void perfectConductorRightS(arr3_double& vectorX, arr3_double& vectorY, arr3_double& vectorZ, int dir); + void perfectConductorRightS(arr3_double vectorX, arr3_double vectorY, arr3_double vectorZ, int dir); /*! Calculate the sysceptibility tensor on the boundary */ void sustensorRightX(double **susxx, double **susyx, double **suszx); @@ -193,12 +193,12 @@ class EMfields3D // :public Field // field components without ghost cells // - void getExc(arr3_double& arr, Grid3DCU *grid); - void getEyc(arr3_double& arr, Grid3DCU *grid); - void getEzc(arr3_double& arr, Grid3DCU *grid); - void getBxc(arr3_double& arr); - void getByc(arr3_double& arr); - void getBzc(arr3_double& arr); + void getExc(arr3_double arr, Grid3DCU *grid); + void getEyc(arr3_double arr, Grid3DCU *grid); + void getEzc(arr3_double arr, Grid3DCU *grid); + void getBxc(arr3_double arr); + void getByc(arr3_double arr); + void getBzc(arr3_double arr); arr3_double getRHOc() { return rhoc; } arr3_double getRHOn() { return rhon; } @@ -211,7 +211,7 @@ class EMfields3D // :public Field double getRHOns(int X,int Y,int Z,int is)const{return rhons.get(is,X,Y,Z);} arr4_double getRHOns(){return rhons;} /* density on cells without ghost cells */ - void getRHOcs(arr3_double& arr, Grid3DCU *grid, int is); + void getRHOcs(arr3_double arr, Grid3DCU *grid, int is); double getBx_ext(int X, int Y, int Z) const{return Bx_ext.get(X,Y,Z);} double getBy_ext(int X, int Y, int Z) const{return By_ext.get(X,Y,Z);} @@ -246,9 +246,9 @@ class EMfields3D // :public Field // get current for species in all cells except ghost // - void getJxsc(arr3_double& arr, Grid3DCU *grid, int is); - void getJysc(arr3_double& arr, Grid3DCU *grid, int is); - void getJzsc(arr3_double& arr, Grid3DCU *grid, int is); + void getJxsc(arr3_double arr, Grid3DCU *grid, int is); + void getJysc(arr3_double arr, Grid3DCU *grid, int is); + void getJzsc(arr3_double arr, Grid3DCU *grid, int is); /*! get the electric field energy */ double getEenergy(); @@ -497,12 +497,12 @@ class EMfields3D // :public Field injInfoFields* get_InfoFieldsRear(); injInfoFields* get_InfoFieldsRight(); - void BoundaryConditionsB(arr3_double& vectorX, arr3_double& vectorY, arr3_double& vectorZ, + void BoundaryConditionsB(arr3_double vectorX, arr3_double vectorY, arr3_double vectorZ, int nx, int ny, int nz,Grid *grid, VirtualTopology3D *vct); - void BoundaryConditionsE(arr3_double& vectorX, arr3_double& vectorY, arr3_double& vectorZ, + void BoundaryConditionsE(arr3_double vectorX, arr3_double vectorY, arr3_double vectorZ, int nx, int ny, int nz,Grid *grid, VirtualTopology3D *vct); - void BoundaryConditionsEImage(arr3_double& imageX, arr3_double& imageY, arr3_double& imageZ, - const_arr3_double& vectorX, const_arr3_double& vectorY, const_arr3_double& vectorZ, + void BoundaryConditionsEImage(arr3_double imageX, arr3_double imageY, arr3_double imageZ, + const_arr3_double vectorX, const_arr3_double vectorY, const_arr3_double vectorZ, int nx, int ny, int nz, VirtualTopology3D *vct,Grid *grid); }; diff --git a/include/Grid3DCU.h b/include/Grid3DCU.h index c8cd883d..2754ec95 100644 --- a/include/Grid3DCU.h +++ b/include/Grid3DCU.h @@ -42,76 +42,76 @@ class Grid3DCU // :public Grid /** print grid info */ void print(VirtualTopology3D * ptVCT); /** calculate a derivative along a direction on nodes */ - void derivN(arr3_double& derN, - const_arr4_double& scFieldC, int ns, int dir); + void derivN(arr3_double derN, + const_arr4_double scFieldC, int ns, int dir); /** calculate gradient on nodes, given a scalar field defined on central points */ - void gradC2N(arr3_double& gradXN, arr3_double& gradYN, arr3_double& gradZN, - const_arr3_double& scFieldC); + void gradC2N(arr3_double gradXN, arr3_double gradYN, arr3_double gradZN, + const_arr3_double scFieldC); /** calculate gradient on nodes, given a scalar field defined on central points */ - void gradN2C(arr3_double& gradXC, arr3_double& gradYC, arr3_double& gradZC, - const_arr3_double& scFieldN); + void gradN2C(arr3_double gradXC, arr3_double gradYC, arr3_double gradZC, + const_arr3_double scFieldN); /** calculate divergence on central points, given a vector field defined on nodes */ - void divN2C(arr3_double& divC, - const_arr3_double& vecFieldXN, - const_arr3_double& vecFieldYN, - const_arr3_double& vecFieldZN); + void divN2C(arr3_double divC, + const_arr3_double vecFieldXN, + const_arr3_double vecFieldYN, + const_arr3_double vecFieldZN); /** calculate divergence on nodes, given a vector field defined on central points */ - void divC2N(arr3_double& divN, - const_arr3_double& vecFieldXC, - const_arr3_double& vecFieldYC, - const_arr3_double& vecFieldZC); + void divC2N(arr3_double divN, + const_arr3_double vecFieldXC, + const_arr3_double vecFieldYC, + const_arr3_double vecFieldZC); /** calculate curl on nodes, given a vector field defined on central points */ - void curlC2N(arr3_double& curlXN, arr3_double& curlYN, - arr3_double& curlZN, - const_arr3_double& vecFieldXC, - const_arr3_double& vecFieldYC, - const_arr3_double& vecFieldZC); + void curlC2N(arr3_double curlXN, arr3_double curlYN, + arr3_double curlZN, + const_arr3_double vecFieldXC, + const_arr3_double vecFieldYC, + const_arr3_double vecFieldZC); /** calculate curl on central points, given a vector field defined on nodes */ - void curlN2C(arr3_double& curlXC, arr3_double& curlYC, arr3_double& curlZC, - const_arr3_double& vecFieldXN, - const_arr3_double& vecFieldYN, - const_arr3_double& vecFieldZN); + void curlN2C(arr3_double curlXC, arr3_double curlYC, arr3_double curlZC, + const_arr3_double vecFieldXN, + const_arr3_double vecFieldYN, + const_arr3_double vecFieldZN); /** calculate divergence on central points, given a Tensor field defined on nodes */ - void divSymmTensorN2C(arr3_double& divCX, arr3_double& divCY, arr3_double& divCZ, - const_arr4_double& pXX, - const_arr4_double& pXY, - const_arr4_double& pXZ, - const_arr4_double& pYY, - const_arr4_double& pYZ, - const_arr4_double& pZZ, int ns); + void divSymmTensorN2C(arr3_double divCX, arr3_double divCY, arr3_double divCZ, + const_arr4_double pXX, + const_arr4_double pXY, + const_arr4_double pXZ, + const_arr4_double pYY, + const_arr4_double pYZ, + const_arr4_double pZZ, int ns); /** calculate laplacian on nodes, given a scalar field defined on nodes */ - void lapN2N(arr3_double& lapN, - const_arr3_double& scFieldN, VirtualTopology3D * vct); + void lapN2N(arr3_double lapN, + const_arr3_double scFieldN, VirtualTopology3D * vct); /** calculate laplacian on central points, given a scalar field defined on central points for Poisson */ - void lapC2Cpoisson(arr3_double& lapC, - arr3_double& scFieldC, VirtualTopology3D * vct); + void lapC2Cpoisson(arr3_double lapC, + arr3_double scFieldC, VirtualTopology3D * vct); /** calculate laplacian on central points, given a scalar field defined on central points */ - void lapC2C(arr3_double& lapC, - const_arr3_double& scFieldC, VirtualTopology3D * vct); + void lapC2C(arr3_double lapC, + const_arr3_double scFieldC, VirtualTopology3D * vct); /** calculate divergence on boundaries */ - void divBCleft(arr3_double& divBC, - const_arr3_double& vectorX, - const_arr3_double& vectorY, - const_arr3_double& vectorZ, int leftActiveNode, int dirDER); + void divBCleft(arr3_double divBC, + const_arr3_double vectorX, + const_arr3_double vectorY, + const_arr3_double vectorZ, int leftActiveNode, int dirDER); /** calculate divergence on boundaries */ - void divBCright(arr3_double& divBC, - const_arr3_double& vectorX, - const_arr3_double& vectorY, - const_arr3_double& vectorZ, int rightActiveNode, int dirDER); + void divBCright(arr3_double divBC, + const_arr3_double vectorX, + const_arr3_double vectorY, + const_arr3_double vectorZ, int rightActiveNode, int dirDER); /** calculate derivative on boundaries */ - void derBC(arr3_double& derBC, - const_arr3_double& vector, int leftActiveNode, int dirDER); + void derBC(arr3_double derBC, + const_arr3_double vector, int leftActiveNode, int dirDER); /** interpolate on nodes from central points */ - void interpC2N(arr3_double& vecFieldN, const_arr3_double& vecFieldC); + void interpC2N(arr3_double vecFieldN, const_arr3_double vecFieldC); /** interpolate on central points from nodes */ - void interpN2C(arr3_double& vecFieldC, const_arr3_double& vecFieldN); + void interpN2C(arr3_double vecFieldC, const_arr3_double vecFieldN); /** interpolate on central points from nodes */ - void interpN2C(arr4_double& vecFieldC, int ns, const_arr4_double& vecFieldN); + void interpN2C(arr4_double vecFieldC, int ns, const_arr4_double vecFieldN); // /////////// PRIVATE VARIABLES ////////////// private: diff --git a/include/Moments.h b/include/Moments.h index 981ee15b..fd28e169 100644 --- a/include/Moments.h +++ b/include/Moments.h @@ -39,16 +39,16 @@ class Moments { double get_pYZ(int i, int j, int k) const { return pYZ.get(i,j,k); } double get_pZZ(int i, int j, int k) const { return pZZ.get(i,j,k); } // fetch accessors (write access) - arr3_double& fetch_rho() { return rho; } - arr3_double& fetch_Jx () { return Jx ; } - arr3_double& fetch_Jy () { return Jy ; } - arr3_double& fetch_Jz () { return Jz ; } - arr3_double& fetch_Pxx() { return pXX; } - arr3_double& fetch_Pxy() { return pXY; } - arr3_double& fetch_Pxz() { return pXZ; } - arr3_double& fetch_Pyy() { return pYY; } - arr3_double& fetch_Pyz() { return pYZ; } - arr3_double& fetch_Pzz() { return pZZ; } + arr3_double fetch_rho() { return rho; } + arr3_double fetch_Jx () { return Jx ; } + arr3_double fetch_Jy () { return Jy ; } + arr3_double fetch_Jz () { return Jz ; } + arr3_double fetch_Pxx() { return pXX; } + arr3_double fetch_Pxy() { return pXY; } + arr3_double fetch_Pxz() { return pXZ; } + arr3_double fetch_Pyy() { return pYY; } + arr3_double fetch_Pyz() { return pYZ; } + arr3_double fetch_Pzz() { return pZZ; } public: Moments(int nxn, int nyn, int nzn) : nx(nxn), diff --git a/include/TransArraySpace3D.h b/include/TransArraySpace3D.h index c83710f3..c05a1bdf 100644 --- a/include/TransArraySpace3D.h +++ b/include/TransArraySpace3D.h @@ -11,7 +11,7 @@ developers : Stefano Markidis, Giovanni Lapenta #define TransArraySpace3D_H /** method to convert a 1D field in a 3D field not considering guard cells*/ -inline void solver2phys(arr3_double& vectPhys, double *vectSolver, int nx, int ny, int nz) { +inline void solver2phys(arr3_double vectPhys, double *vectSolver, int nx, int ny, int nz) { for (register int i = 1; i < nx - 1; i++) for (register int j = 1; j < ny - 1; j++) for (register int k = 1; k < nz - 1; k++) @@ -19,7 +19,7 @@ inline void solver2phys(arr3_double& vectPhys, double *vectSolver, int nx, int n } /** method to convert a 1D field in a 3D field not considering guard cells*/ -inline void solver2phys(arr3_double& vectPhys1, arr3_double& vectPhys2, arr3_double& vectPhys3, double *vectSolver, int nx, int ny, int nz) { +inline void solver2phys(arr3_double vectPhys1, arr3_double vectPhys2, arr3_double vectPhys3, double *vectSolver, int nx, int ny, int nz) { for (register int i = 1; i < nx - 1; i++) for (register int j = 1; j < ny - 1; j++) for (register int k = 1; k < nz - 1; k++) { @@ -29,14 +29,14 @@ inline void solver2phys(arr3_double& vectPhys1, arr3_double& vectPhys2, arr3_dou } } /** method to convert a 3D field in a 1D field not considering guard cells*/ -inline void phys2solver(double *vectSolver, const arr3_double& vectPhys, int nx, int ny, int nz) { +inline void phys2solver(double *vectSolver, const arr3_double vectPhys, int nx, int ny, int nz) { for (register int i = 1; i < nx - 1; i++) for (register int j = 1; j < ny - 1; j++) for (register int k = 1; k < nz - 1; k++) *vectSolver++ = vectPhys.get(i,j,k); } /** method to convert a 3D field in a 1D field not considering guard cells*/ -inline void phys2solver(double *vectSolver, const arr3_double& vectPhys1, const arr3_double& vectPhys2, const arr3_double& vectPhys3, int nx, int ny, int nz) { +inline void phys2solver(double *vectSolver, const arr3_double vectPhys1, const arr3_double vectPhys2, const arr3_double vectPhys3, int nx, int ny, int nz) { for (register int i = 1; i < nx - 1; i++) for (register int j = 1; j < ny - 1; j++) for (register int k = 1; k < nz - 1; k++) { diff --git a/include/phdf5.h b/include/phdf5.h index 39dba569..462fe746 100644 --- a/include/phdf5.h +++ b/include/phdf5.h @@ -22,9 +22,9 @@ class PHDF5fileClass{ void CreatePHDF5file(double *L, int *dglob, int *dlocl, bool bp); void ClosePHDF5file(); void OpenPHDF5file(); - void ReadPHDF5dataset_double(string dataset, arr3_double& data); + void ReadPHDF5dataset_double(string dataset, arr3_double data); void ReadPHDF5param(); - int WritePHDF5dataset(string grpname, string datasetname, const_arr3_double& data, int nx, int ny, int nz); + int WritePHDF5dataset(string grpname, string datasetname, const_arr3_double data, int nx, int ny, int nz); int getPHDF5ndim(); int getPHDF5ncx(); diff --git a/inputoutput/phdf5.cpp b/inputoutput/phdf5.cpp index 5b8368ab..1496aa59 100644 --- a/inputoutput/phdf5.cpp +++ b/inputoutput/phdf5.cpp @@ -112,7 +112,7 @@ void PHDF5fileClass::ClosePHDF5file(){ } -int PHDF5fileClass::WritePHDF5dataset(string grpname, string datasetname, const_arr3_double& data, int nx, int ny, int nz){ +int PHDF5fileClass::WritePHDF5dataset(string grpname, string datasetname, const_arr3_double data, int nx, int ny, int nz){ /* -------------------------- */ /* Local variables and arrays */ @@ -266,7 +266,7 @@ void PHDF5fileClass::ReadPHDF5param(){ } -void PHDF5fileClass::ReadPHDF5dataset_double(string datasetname, arr3_double& data){ +void PHDF5fileClass::ReadPHDF5dataset_double(string datasetname, arr3_double data){ herr_t status; double *filedata; From 967631ae81cd5c9f25bdcc23f73628af92ea427c Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 9 Aug 2013 17:56:18 +0200 Subject: [PATCH 029/118] committing settings for large number of particles (for MIC) --- inputfiles/GEM.inp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/inputfiles/GEM.inp b/inputfiles/GEM.inp index b268cac7..ba6173c2 100644 --- a/inputfiles/GEM.inp +++ b/inputfiles/GEM.inp @@ -47,8 +47,8 @@ y_center = 1. # Ly = simulation box length - y direction in m z_center = 1. # Lz = simulation box length - z direction in m L_square = .1 -nxc = 64 # nxc = number of cells - x direction -nyc = 64 # nyc = number of cells - y direction +nxc = 128 # nxc = number of cells - x direction +nyc = 128 # nyc = number of cells - y direction nzc = 1 # nzc = number of cells - z direction # %%%%%%%%%%%%%% PARTICLES %%%%%%%%%%%%%%%%% @@ -64,11 +64,11 @@ rhoINJECT = 1.0 1.0 # TrackParticleID[species] = 1=true, 0=false --> Assign ID to particles TrackParticleID = 0 0 # npcelx = number of particles per cell - Direction X -npcelx = 3 3 +npcelx = 8 8 # npcely = number of particles per cell - Direction Y -npcely = 3 3 +npcely = 8 8 # npcelz = number of particles per cell - Direction Z -npcelz = 3 3 +npcelz = 8 8 # qom = charge to mass ratio for different species qom = -256.0 1.0 # uth = thermal velocity for different species - Direction X From dc4a2dcb81a21b44de4b6eddb365cc74bce7b06c Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 12 Aug 2013 15:16:36 +0200 Subject: [PATCH 030/118] using arr4_double(nxn,nyn,nzn,10) instance to sum moments --- fields/EMfields3D.cpp | 339 ++++++++++++++++++++---------------------- include/Alloc.h | 8 + include/EMfields3D.h | 5 +- 3 files changed, 172 insertions(+), 180 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 93b18100..7d752707 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -191,11 +191,10 @@ EMfields3D::EMfields3D(Collective * col, Grid * grid) : injFieldsRear = new injInfoFields(nxn, nyn, nzn); sizeMomentsArray = omp_thread_count(); - momentsArray = new Moments*[sizeMomentsArray]; + momentsArray = (arr4_double**) malloc(sizeof(void*)*sizeMomentsArray); for(int i=0;iinit(nxn,nyn,nzn); + momentsArray[i] = new arr4_double(nxn,nyn,nzn,10); } } @@ -220,16 +219,6 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop double const*const q = pcls.getQall(); // const int is = pcls.get_ns(); - double* rhons1d = &rhons[is][0][0][0]; - double* Jxs1d = &Jxs [is][0][0][0]; - double* Jys1d = &Jys [is][0][0][0]; - double* Jzs1d = &Jzs [is][0][0][0]; - double* pXXsn1d = &pXXsn[is][0][0][0]; - double* pXYsn1d = &pXYsn[is][0][0][0]; - double* pXZsn1d = &pXZsn[is][0][0][0]; - double* pYYsn1d = &pYYsn[is][0][0][0]; - double* pYZsn1d = &pYZsn[is][0][0][0]; - double* pZZsn1d = &pZZsn[is][0][0][0]; // const long long nop_ll = pcls.getNOP(); const int nop = pcls.getNOP(); @@ -242,19 +231,21 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop #pragma omp parallel { int thread_num = omp_get_thread_num(); - Moments& speciesMoments = fetch_momentsArray(thread_num); - speciesMoments.set_to_zero(); + arr4_double moments = fetch_momentsArray(thread_num); + moments.setall(0.); + //Moments& speciesMoments = fetch_momentsArray(thread_num); + //speciesMoments.set_to_zero(); // - arr3_double rho = speciesMoments.fetch_rho(); - arr3_double Jx = speciesMoments.fetch_Jx(); - arr3_double Jy = speciesMoments.fetch_Jy(); - arr3_double Jz = speciesMoments.fetch_Jz(); - arr3_double Pxx = speciesMoments.fetch_Pxx(); - arr3_double Pxy = speciesMoments.fetch_Pxy(); - arr3_double Pxz = speciesMoments.fetch_Pxz(); - arr3_double Pyy = speciesMoments.fetch_Pyy(); - arr3_double Pyz = speciesMoments.fetch_Pyz(); - arr3_double Pzz = speciesMoments.fetch_Pzz(); + //arr3_double rho = speciesMoments.fetch_rho(); + //arr3_double Jx = speciesMoments.fetch_Jx(); + //arr3_double Jy = speciesMoments.fetch_Jy(); + //arr3_double Jz = speciesMoments.fetch_Jz(); + //arr3_double Pxx = speciesMoments.fetch_Pxx(); + //arr3_double Pxy = speciesMoments.fetch_Pxy(); + //arr3_double Pxz = speciesMoments.fetch_Pxz(); + //arr3_double Pyy = speciesMoments.fetch_Pyy(); + //arr3_double Pyz = speciesMoments.fetch_Pyz(); + //arr3_double Pzz = speciesMoments.fetch_Pzz(); // The following loop is expensive, so it is wise to assume that the // compiler is stupid. Therefore we should on the one hand // expand things out and on the other hand avoid repeating computations. @@ -266,12 +257,24 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop const double ui=u[i]; const double vi=v[i]; const double wi=w[i]; - const double uui=ui*ui; - const double uvi=ui*vi; - const double uwi=ui*wi; - const double vvi=vi*vi; - const double vwi=vi*wi; - const double wwi=wi*wi; + //const double uui=ui*ui; + //const double uvi=ui*vi; + //const double uwi=ui*wi; + //const double vvi=vi*vi; + //const double vwi=vi*wi; + //const double wwi=wi*wi; + double velmoments[10]; + velmoments[0] = 1.; + velmoments[1] = ui; + velmoments[2] = vi; + velmoments[3] = wi; + velmoments[4] = ui*ui; + velmoments[5] = ui*vi; + velmoments[6] = ui*wi; + velmoments[7] = vi*vi; + velmoments[8] = vi*wi; + velmoments[9] = wi*wi; + // // compute the weights to distribute the moments // @@ -299,156 +302,136 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop const double weight101 = qi * xi1 * eta0 * zeta1 * invVOL; const double weight110 = qi * xi1 * eta1 * zeta0 * invVOL; const double weight111 = qi * xi1 * eta1 * zeta1 * invVOL; + + moments[ix ][iy ][iz ][0] += velmoments[0]*weight000; + moments[ix ][iy ][iz ][1] += velmoments[1]*weight000; + moments[ix ][iy ][iz ][2] += velmoments[2]*weight000; + moments[ix ][iy ][iz ][3] += velmoments[3]*weight000; + moments[ix ][iy ][iz ][4] += velmoments[4]*weight000; + moments[ix ][iy ][iz ][5] += velmoments[5]*weight000; + moments[ix ][iy ][iz ][6] += velmoments[6]*weight000; + moments[ix ][iy ][iz ][7] += velmoments[7]*weight000; + moments[ix ][iy ][iz ][8] += velmoments[8]*weight000; + moments[ix ][iy ][iz ][9] += velmoments[9]*weight000; + // + moments[ix ][iy ][iz-1][0] += velmoments[0]*weight001; + moments[ix ][iy ][iz-1][1] += velmoments[1]*weight001; + moments[ix ][iy ][iz-1][2] += velmoments[2]*weight001; + moments[ix ][iy ][iz-1][3] += velmoments[3]*weight001; + moments[ix ][iy ][iz-1][4] += velmoments[4]*weight001; + moments[ix ][iy ][iz-1][5] += velmoments[5]*weight001; + moments[ix ][iy ][iz-1][6] += velmoments[6]*weight001; + moments[ix ][iy ][iz-1][7] += velmoments[7]*weight001; + moments[ix ][iy ][iz-1][8] += velmoments[8]*weight001; + moments[ix ][iy ][iz-1][9] += velmoments[9]*weight001; + // + moments[ix ][iy-1][iz ][0] += velmoments[0]*weight010; + moments[ix ][iy-1][iz ][1] += velmoments[1]*weight010; + moments[ix ][iy-1][iz ][2] += velmoments[2]*weight010; + moments[ix ][iy-1][iz ][3] += velmoments[3]*weight010; + moments[ix ][iy-1][iz ][4] += velmoments[4]*weight010; + moments[ix ][iy-1][iz ][5] += velmoments[5]*weight010; + moments[ix ][iy-1][iz ][6] += velmoments[6]*weight010; + moments[ix ][iy-1][iz ][7] += velmoments[7]*weight010; + moments[ix ][iy-1][iz ][8] += velmoments[8]*weight010; + moments[ix ][iy-1][iz ][9] += velmoments[9]*weight010; + // + moments[ix ][iy-1][iz-1][0] += velmoments[0]*weight011; + moments[ix ][iy-1][iz-1][1] += velmoments[1]*weight011; + moments[ix ][iy-1][iz-1][2] += velmoments[2]*weight011; + moments[ix ][iy-1][iz-1][3] += velmoments[3]*weight011; + moments[ix ][iy-1][iz-1][4] += velmoments[4]*weight011; + moments[ix ][iy-1][iz-1][5] += velmoments[5]*weight011; + moments[ix ][iy-1][iz-1][6] += velmoments[6]*weight011; + moments[ix ][iy-1][iz-1][7] += velmoments[7]*weight011; + moments[ix ][iy-1][iz-1][8] += velmoments[8]*weight011; + moments[ix ][iy-1][iz-1][9] += velmoments[9]*weight011; + // + moments[ix-1][iy-1][iz ][0] += velmoments[0]*weight100; + moments[ix-1][iy-1][iz ][1] += velmoments[1]*weight100; + moments[ix-1][iy-1][iz ][2] += velmoments[2]*weight100; + moments[ix-1][iy-1][iz ][3] += velmoments[3]*weight100; + moments[ix-1][iy-1][iz ][4] += velmoments[4]*weight100; + moments[ix-1][iy-1][iz ][5] += velmoments[5]*weight100; + moments[ix-1][iy-1][iz ][6] += velmoments[6]*weight100; + moments[ix-1][iy-1][iz ][7] += velmoments[7]*weight100; + moments[ix-1][iy-1][iz ][8] += velmoments[8]*weight100; + moments[ix-1][iy-1][iz ][9] += velmoments[9]*weight100; // - // use the weight to distribute the moments + moments[ix-1][iy ][iz-1][0] += velmoments[0]*weight101; + moments[ix-1][iy ][iz-1][1] += velmoments[1]*weight101; + moments[ix-1][iy ][iz-1][2] += velmoments[2]*weight101; + moments[ix-1][iy ][iz-1][3] += velmoments[3]*weight101; + moments[ix-1][iy ][iz-1][4] += velmoments[4]*weight101; + moments[ix-1][iy ][iz-1][5] += velmoments[5]*weight101; + moments[ix-1][iy ][iz-1][6] += velmoments[6]*weight101; + moments[ix-1][iy ][iz-1][7] += velmoments[7]*weight101; + moments[ix-1][iy ][iz-1][8] += velmoments[8]*weight101; + moments[ix-1][iy ][iz-1][9] += velmoments[9]*weight101; // - // add charge density - //speciesMoments.addRho(weight, ix, iy, iz); - rho[ix ][iy ][iz ] += weight000; - rho[ix ][iy ][iz-1] += weight001; - rho[ix ][iy-1][iz ] += weight010; - rho[ix ][iy-1][iz-1] += weight011; - rho[ix-1][iy ][iz ] += weight100; - rho[ix-1][iy ][iz-1] += weight101; - rho[ix-1][iy-1][iz ] += weight110; - rho[ix-1][iy-1][iz-1] += weight111; - // add current density - X - //speciesMoments.addJx(temp, ix, iy, iz); - Jx[ix ][iy ][iz ] += ui*weight000; - Jx[ix ][iy ][iz-1] += ui*weight001; - Jx[ix ][iy-1][iz ] += ui*weight010; - Jx[ix ][iy-1][iz-1] += ui*weight011; - Jx[ix-1][iy ][iz ] += ui*weight100; - Jx[ix-1][iy ][iz-1] += ui*weight101; - Jx[ix-1][iy-1][iz ] += ui*weight110; - Jx[ix-1][iy-1][iz-1] += ui*weight111; - // add current density - Y - //speciesMoments.addJy(temp, ix, iy, iz); - Jy[ix ][iy ][iz ] += vi*weight000; - Jy[ix ][iy ][iz-1] += vi*weight001; - Jy[ix ][iy-1][iz ] += vi*weight010; - Jy[ix ][iy-1][iz-1] += vi*weight011; - Jy[ix-1][iy ][iz ] += vi*weight100; - Jy[ix-1][iy ][iz-1] += vi*weight101; - Jy[ix-1][iy-1][iz ] += vi*weight110; - Jy[ix-1][iy-1][iz-1] += vi*weight111; - // add current density - Z - //speciesMoments.addJz(temp, ix, iy, iz); - Jz[ix ][iy ][iz ] += wi*weight000; - Jz[ix ][iy ][iz-1] += wi*weight001; - Jz[ix ][iy-1][iz ] += wi*weight010; - Jz[ix ][iy-1][iz-1] += wi*weight011; - Jz[ix-1][iy ][iz ] += wi*weight100; - Jz[ix-1][iy ][iz-1] += wi*weight101; - Jz[ix-1][iy-1][iz ] += wi*weight110; - Jz[ix-1][iy-1][iz-1] += wi*weight111; - // Pxx - add pressure tensor - //speciesMoments.addPxx(temp, ix, iy, iz); - Pxx[ix ][iy ][iz ] += uui*weight000; - Pxx[ix ][iy ][iz-1] += uui*weight001; - Pxx[ix ][iy-1][iz ] += uui*weight010; - Pxx[ix ][iy-1][iz-1] += uui*weight011; - Pxx[ix-1][iy ][iz ] += uui*weight100; - Pxx[ix-1][iy ][iz-1] += uui*weight101; - Pxx[ix-1][iy-1][iz ] += uui*weight110; - Pxx[ix-1][iy-1][iz-1] += uui*weight111; - // Pxy - add pressure tensor - //speciesMoments.addPxy(temp, ix, iy, iz); - Pxy[ix ][iy ][iz ] += uvi*weight000; - Pxy[ix ][iy ][iz-1] += uvi*weight001; - Pxy[ix ][iy-1][iz ] += uvi*weight010; - Pxy[ix ][iy-1][iz-1] += uvi*weight011; - Pxy[ix-1][iy ][iz ] += uvi*weight100; - Pxy[ix-1][iy ][iz-1] += uvi*weight101; - Pxy[ix-1][iy-1][iz ] += uvi*weight110; - Pxy[ix-1][iy-1][iz-1] += uvi*weight111; - // Pxz - add pressure tensor - //speciesMoments.addPxz(temp, ix, iy, iz); - Pxz[ix ][iy ][iz ] += uwi*weight000; - Pxz[ix ][iy ][iz-1] += uwi*weight001; - Pxz[ix ][iy-1][iz ] += uwi*weight010; - Pxz[ix ][iy-1][iz-1] += uwi*weight011; - Pxz[ix-1][iy ][iz ] += uwi*weight100; - Pxz[ix-1][iy ][iz-1] += uwi*weight101; - Pxz[ix-1][iy-1][iz ] += uwi*weight110; - Pxz[ix-1][iy-1][iz-1] += uwi*weight111; - // Pyy - add pressure tensor - //speciesMoments.addPyy(temp, ix, iy, iz); - Pyy[ix ][iy ][iz ] += vvi*weight000; - Pyy[ix ][iy ][iz-1] += vvi*weight001; - Pyy[ix ][iy-1][iz ] += vvi*weight010; - Pyy[ix ][iy-1][iz-1] += vvi*weight011; - Pyy[ix-1][iy ][iz ] += vvi*weight100; - Pyy[ix-1][iy ][iz-1] += vvi*weight101; - Pyy[ix-1][iy-1][iz ] += vvi*weight110; - Pyy[ix-1][iy-1][iz-1] += vvi*weight111; - // Pyz - add pressure tensor - //speciesMoments.addPyz(temp, ix, iy, iz); - Pyz[ix ][iy ][iz ] += vwi*weight000; - Pyz[ix ][iy ][iz-1] += vwi*weight001; - Pyz[ix ][iy-1][iz ] += vwi*weight010; - Pyz[ix ][iy-1][iz-1] += vwi*weight011; - Pyz[ix-1][iy ][iz ] += vwi*weight100; - Pyz[ix-1][iy ][iz-1] += vwi*weight101; - Pyz[ix-1][iy-1][iz ] += vwi*weight110; - Pyz[ix-1][iy-1][iz-1] += vwi*weight111; - // Pzz - add pressure tensor - //speciesMoments.addPzz(temp, ix, iy, iz); - Pzz[ix ][iy ][iz ] += wwi*weight000; - Pzz[ix ][iy ][iz-1] += wwi*weight001; - Pzz[ix ][iy-1][iz ] += wwi*weight010; - Pzz[ix ][iy-1][iz-1] += wwi*weight011; - Pzz[ix-1][iy ][iz ] += wwi*weight100; - Pzz[ix-1][iy ][iz-1] += wwi*weight101; - Pzz[ix-1][iy-1][iz ] += wwi*weight110; - Pzz[ix-1][iy-1][iz-1] += wwi*weight111; + moments[ix-1][iy-1][iz ][0] += velmoments[0]*weight110; + moments[ix-1][iy-1][iz ][1] += velmoments[1]*weight110; + moments[ix-1][iy-1][iz ][2] += velmoments[2]*weight110; + moments[ix-1][iy-1][iz ][3] += velmoments[3]*weight110; + moments[ix-1][iy-1][iz ][4] += velmoments[4]*weight110; + moments[ix-1][iy-1][iz ][5] += velmoments[5]*weight110; + moments[ix-1][iy-1][iz ][6] += velmoments[6]*weight110; + moments[ix-1][iy-1][iz ][7] += velmoments[7]*weight110; + moments[ix-1][iy-1][iz ][8] += velmoments[8]*weight110; + moments[ix-1][iy-1][iz ][9] += velmoments[9]*weight110; + // + moments[ix-1][iy-1][iz-1][0] += velmoments[0]*weight111; + moments[ix-1][iy-1][iz-1][1] += velmoments[1]*weight111; + moments[ix-1][iy-1][iz-1][2] += velmoments[2]*weight111; + moments[ix-1][iy-1][iz-1][3] += velmoments[3]*weight111; + moments[ix-1][iy-1][iz-1][4] += velmoments[4]*weight111; + moments[ix-1][iy-1][iz-1][5] += velmoments[5]*weight111; + moments[ix-1][iy-1][iz-1][6] += velmoments[6]*weight111; + moments[ix-1][iy-1][iz-1][7] += velmoments[7]*weight111; + moments[ix-1][iy-1][iz-1][8] += velmoments[8]*weight111; + moments[ix-1][iy-1][iz-1][9] += velmoments[9]*weight111; + // + //for(int jx=0;jx<2;jx++) + //for(int jy=0;jy<2;jy++) + //for(int jz=0;jz<2;jz++) + //for(int m=0;m<10;m++) + //{ + // moments[ix-jx][iy-jy][iz-jz][m] += velmoments[m]*weight[jx][jy][jz]; + //} } - // The following way is an equivalent reduction but less - // efficient for a large number of threads. - // - //#pragma omp critical - //addToSpeciesMoments(speciesMoments,is); - // - // Instead we split up the reduction tasks. - // - // One-dimensional array access is presumably - // more efficient on poor compilers. - // - double* rho1d = &rho[0][0][0]; - double* Jx1d = &Jx [0][0][0]; - double* Jy1d = &Jy [0][0][0]; - double* Jz1d = &Jz [0][0][0]; - double* Pxx1d = &Pxx[0][0][0]; - double* Pxy1d = &Pxy[0][0][0]; - double* Pxz1d = &Pxz[0][0][0]; - double* Pyy1d = &Pyy[0][0][0]; - double* Pyz1d = &Pyz[0][0][0]; - double* Pzz1d = &Pzz[0][0][0]; + // We split up the reduction tasks. // - assert_eq(speciesMoments.get_nx(), nxn); - assert_eq(speciesMoments.get_ny(), nyn); - assert_eq(speciesMoments.get_nz(), nzn); const int numel = nxn*nyn*nzn; - #pragma omp critical - for(int i=0;ifree(); } - delete [] momentsArray; + free(momentsArray); } diff --git a/include/Alloc.h b/include/Alloc.h index 5c3c5b5d..46c68fd9 100644 --- a/include/Alloc.h +++ b/include/Alloc.h @@ -608,6 +608,13 @@ namespace iPic3D void set(size_t n4,size_t n3,size_t n2,size_t n1, type value) { check_idx_bounds(n4,n3,n2,n1); arr4[n4][n3][n2][n1] = value; } #endif + protected: + void setall(double val) + { + #pragma omp for + for(int i=0;i @@ -644,6 +651,7 @@ namespace iPic3D { const_array_ref4::set(n4,n3,n2,n1, value); } void free(){ delArray4((type****)arr4); } type**** fetch_arr4(){ return (type****) arr4; } + void setall(double val) { const_array_ref4::setall(val); } //bool verify_dims(size_t s4, size_t s3, size_t s2, size_t s1){ // if(s4==S4 && s3==S3 && s2==S2 && s1==S1) return true; // Wprintf("%d==%d && %d==%d && %d==%d && %d==%d failed", diff --git a/include/EMfields3D.h b/include/EMfields3D.h index 335a45ea..d0653090 100644 --- a/include/EMfields3D.h +++ b/include/EMfields3D.h @@ -256,7 +256,7 @@ class EMfields3D // :public Field double getBenergy(); /*! fetch array for summing moments of thread i */ - Moments& fetch_momentsArray(int i){ + arr4_double fetch_momentsArray(int i){ assert_le(0,i); assert_le(i,sizeMomentsArray); return *momentsArray[i]; @@ -384,7 +384,8 @@ class EMfields3D // :public Field array3_double divC; /* temporary arrays for summing moments */ int sizeMomentsArray; - Moments **momentsArray; + //Moments **momentsArray; + arr4_double** momentsArray; // ******************************************************************************* From 991d97d997e1699b4a677b3c5e0dcf72cec757df Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 12 Aug 2013 20:23:22 +0200 Subject: [PATCH 031/118] fixed bugs introduced in previous commit --- fields/EMfields3D.cpp | 561 +++++++++++++++++++++++++++++------------- include/Alloc.h | 4 +- include/EMfields3D.h | 13 +- 3 files changed, 397 insertions(+), 181 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 7d752707..7cfea8f1 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -5,6 +5,7 @@ #include "TimeTasks.h" #include "Moments.h" #include "ompdefs.h" +#include "debug.h" /*! constructor */ // @@ -191,10 +192,13 @@ EMfields3D::EMfields3D(Collective * col, Grid * grid) : injFieldsRear = new injInfoFields(nxn, nyn, nzn); sizeMomentsArray = omp_thread_count(); - momentsArray = (arr4_double**) malloc(sizeof(void*)*sizeMomentsArray); + momentsArray = new Moments*[sizeMomentsArray]; + moments10 = (arr4_double**) malloc(sizeof(void*)*sizeMomentsArray); for(int i=0;iinit(nxn,nyn,nzn); + moments10[i] = new arr4_double(nxn,nyn,nzn,10); } } @@ -219,6 +223,20 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop double const*const q = pcls.getQall(); // const int is = pcls.get_ns(); + bool bmoments10 = true; + bool b10moments = false; + + // if b10moments + double* rhons1d = &rhons[is][0][0][0]; + double* Jxs1d = &Jxs [is][0][0][0]; + double* Jys1d = &Jys [is][0][0][0]; + double* Jzs1d = &Jzs [is][0][0][0]; + double* pXXsn1d = &pXXsn[is][0][0][0]; + double* pXYsn1d = &pXYsn[is][0][0][0]; + double* pXZsn1d = &pXZsn[is][0][0][0]; + double* pYYsn1d = &pYYsn[is][0][0][0]; + double* pYZsn1d = &pYZsn[is][0][0][0]; + double* pZZsn1d = &pZZsn[is][0][0][0]; // const long long nop_ll = pcls.getNOP(); const int nop = pcls.getNOP(); @@ -231,21 +249,21 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop #pragma omp parallel { int thread_num = omp_get_thread_num(); - arr4_double moments = fetch_momentsArray(thread_num); + Moments& speciesMoments = fetch_momentsArray(thread_num); + speciesMoments.set_to_zero(); + arr4_double moments = fetch_moments10(thread_num); moments.setall(0.); - //Moments& speciesMoments = fetch_momentsArray(thread_num); - //speciesMoments.set_to_zero(); // - //arr3_double rho = speciesMoments.fetch_rho(); - //arr3_double Jx = speciesMoments.fetch_Jx(); - //arr3_double Jy = speciesMoments.fetch_Jy(); - //arr3_double Jz = speciesMoments.fetch_Jz(); - //arr3_double Pxx = speciesMoments.fetch_Pxx(); - //arr3_double Pxy = speciesMoments.fetch_Pxy(); - //arr3_double Pxz = speciesMoments.fetch_Pxz(); - //arr3_double Pyy = speciesMoments.fetch_Pyy(); - //arr3_double Pyz = speciesMoments.fetch_Pyz(); - //arr3_double Pzz = speciesMoments.fetch_Pzz(); + arr3_double rho = speciesMoments.fetch_rho(); + arr3_double Jx = speciesMoments.fetch_Jx(); + arr3_double Jy = speciesMoments.fetch_Jy(); + arr3_double Jz = speciesMoments.fetch_Jz(); + arr3_double Pxx = speciesMoments.fetch_Pxx(); + arr3_double Pxy = speciesMoments.fetch_Pxy(); + arr3_double Pxz = speciesMoments.fetch_Pxz(); + arr3_double Pyy = speciesMoments.fetch_Pyy(); + arr3_double Pyz = speciesMoments.fetch_Pyz(); + arr3_double Pzz = speciesMoments.fetch_Pzz(); // The following loop is expensive, so it is wise to assume that the // compiler is stupid. Therefore we should on the one hand // expand things out and on the other hand avoid repeating computations. @@ -257,23 +275,23 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop const double ui=u[i]; const double vi=v[i]; const double wi=w[i]; - //const double uui=ui*ui; - //const double uvi=ui*vi; - //const double uwi=ui*wi; - //const double vvi=vi*vi; - //const double vwi=vi*wi; - //const double wwi=wi*wi; + const double uui=ui*ui; + const double uvi=ui*vi; + const double uwi=ui*wi; + const double vvi=vi*vi; + const double vwi=vi*wi; + const double wwi=wi*wi; double velmoments[10]; velmoments[0] = 1.; velmoments[1] = ui; velmoments[2] = vi; velmoments[3] = wi; - velmoments[4] = ui*ui; - velmoments[5] = ui*vi; - velmoments[6] = ui*wi; - velmoments[7] = vi*vi; - velmoments[8] = vi*wi; - velmoments[9] = wi*wi; + velmoments[4] = uui; + velmoments[5] = uvi; + velmoments[6] = uwi; + velmoments[7] = vvi; + velmoments[8] = vwi; + velmoments[9] = wwi; // // compute the weights to distribute the moments @@ -303,135 +321,326 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop const double weight110 = qi * xi1 * eta1 * zeta0 * invVOL; const double weight111 = qi * xi1 * eta1 * zeta1 * invVOL; - moments[ix ][iy ][iz ][0] += velmoments[0]*weight000; - moments[ix ][iy ][iz ][1] += velmoments[1]*weight000; - moments[ix ][iy ][iz ][2] += velmoments[2]*weight000; - moments[ix ][iy ][iz ][3] += velmoments[3]*weight000; - moments[ix ][iy ][iz ][4] += velmoments[4]*weight000; - moments[ix ][iy ][iz ][5] += velmoments[5]*weight000; - moments[ix ][iy ][iz ][6] += velmoments[6]*weight000; - moments[ix ][iy ][iz ][7] += velmoments[7]*weight000; - moments[ix ][iy ][iz ][8] += velmoments[8]*weight000; - moments[ix ][iy ][iz ][9] += velmoments[9]*weight000; - // - moments[ix ][iy ][iz-1][0] += velmoments[0]*weight001; - moments[ix ][iy ][iz-1][1] += velmoments[1]*weight001; - moments[ix ][iy ][iz-1][2] += velmoments[2]*weight001; - moments[ix ][iy ][iz-1][3] += velmoments[3]*weight001; - moments[ix ][iy ][iz-1][4] += velmoments[4]*weight001; - moments[ix ][iy ][iz-1][5] += velmoments[5]*weight001; - moments[ix ][iy ][iz-1][6] += velmoments[6]*weight001; - moments[ix ][iy ][iz-1][7] += velmoments[7]*weight001; - moments[ix ][iy ][iz-1][8] += velmoments[8]*weight001; - moments[ix ][iy ][iz-1][9] += velmoments[9]*weight001; - // - moments[ix ][iy-1][iz ][0] += velmoments[0]*weight010; - moments[ix ][iy-1][iz ][1] += velmoments[1]*weight010; - moments[ix ][iy-1][iz ][2] += velmoments[2]*weight010; - moments[ix ][iy-1][iz ][3] += velmoments[3]*weight010; - moments[ix ][iy-1][iz ][4] += velmoments[4]*weight010; - moments[ix ][iy-1][iz ][5] += velmoments[5]*weight010; - moments[ix ][iy-1][iz ][6] += velmoments[6]*weight010; - moments[ix ][iy-1][iz ][7] += velmoments[7]*weight010; - moments[ix ][iy-1][iz ][8] += velmoments[8]*weight010; - moments[ix ][iy-1][iz ][9] += velmoments[9]*weight010; - // - moments[ix ][iy-1][iz-1][0] += velmoments[0]*weight011; - moments[ix ][iy-1][iz-1][1] += velmoments[1]*weight011; - moments[ix ][iy-1][iz-1][2] += velmoments[2]*weight011; - moments[ix ][iy-1][iz-1][3] += velmoments[3]*weight011; - moments[ix ][iy-1][iz-1][4] += velmoments[4]*weight011; - moments[ix ][iy-1][iz-1][5] += velmoments[5]*weight011; - moments[ix ][iy-1][iz-1][6] += velmoments[6]*weight011; - moments[ix ][iy-1][iz-1][7] += velmoments[7]*weight011; - moments[ix ][iy-1][iz-1][8] += velmoments[8]*weight011; - moments[ix ][iy-1][iz-1][9] += velmoments[9]*weight011; - // - moments[ix-1][iy-1][iz ][0] += velmoments[0]*weight100; - moments[ix-1][iy-1][iz ][1] += velmoments[1]*weight100; - moments[ix-1][iy-1][iz ][2] += velmoments[2]*weight100; - moments[ix-1][iy-1][iz ][3] += velmoments[3]*weight100; - moments[ix-1][iy-1][iz ][4] += velmoments[4]*weight100; - moments[ix-1][iy-1][iz ][5] += velmoments[5]*weight100; - moments[ix-1][iy-1][iz ][6] += velmoments[6]*weight100; - moments[ix-1][iy-1][iz ][7] += velmoments[7]*weight100; - moments[ix-1][iy-1][iz ][8] += velmoments[8]*weight100; - moments[ix-1][iy-1][iz ][9] += velmoments[9]*weight100; - // - moments[ix-1][iy ][iz-1][0] += velmoments[0]*weight101; - moments[ix-1][iy ][iz-1][1] += velmoments[1]*weight101; - moments[ix-1][iy ][iz-1][2] += velmoments[2]*weight101; - moments[ix-1][iy ][iz-1][3] += velmoments[3]*weight101; - moments[ix-1][iy ][iz-1][4] += velmoments[4]*weight101; - moments[ix-1][iy ][iz-1][5] += velmoments[5]*weight101; - moments[ix-1][iy ][iz-1][6] += velmoments[6]*weight101; - moments[ix-1][iy ][iz-1][7] += velmoments[7]*weight101; - moments[ix-1][iy ][iz-1][8] += velmoments[8]*weight101; - moments[ix-1][iy ][iz-1][9] += velmoments[9]*weight101; - // - moments[ix-1][iy-1][iz ][0] += velmoments[0]*weight110; - moments[ix-1][iy-1][iz ][1] += velmoments[1]*weight110; - moments[ix-1][iy-1][iz ][2] += velmoments[2]*weight110; - moments[ix-1][iy-1][iz ][3] += velmoments[3]*weight110; - moments[ix-1][iy-1][iz ][4] += velmoments[4]*weight110; - moments[ix-1][iy-1][iz ][5] += velmoments[5]*weight110; - moments[ix-1][iy-1][iz ][6] += velmoments[6]*weight110; - moments[ix-1][iy-1][iz ][7] += velmoments[7]*weight110; - moments[ix-1][iy-1][iz ][8] += velmoments[8]*weight110; - moments[ix-1][iy-1][iz ][9] += velmoments[9]*weight110; + if(bmoments10) + { + moments[ix ][iy ][iz ][0] += velmoments[0]*weight000; + moments[ix ][iy ][iz ][1] += velmoments[1]*weight000; + moments[ix ][iy ][iz ][2] += velmoments[2]*weight000; + moments[ix ][iy ][iz ][3] += velmoments[3]*weight000; + moments[ix ][iy ][iz ][4] += velmoments[4]*weight000; + moments[ix ][iy ][iz ][5] += velmoments[5]*weight000; + moments[ix ][iy ][iz ][6] += velmoments[6]*weight000; + moments[ix ][iy ][iz ][7] += velmoments[7]*weight000; + moments[ix ][iy ][iz ][8] += velmoments[8]*weight000; + moments[ix ][iy ][iz ][9] += velmoments[9]*weight000; + + moments[ix ][iy ][iz-1][0] += velmoments[0]*weight001; + moments[ix ][iy ][iz-1][1] += velmoments[1]*weight001; + moments[ix ][iy ][iz-1][2] += velmoments[2]*weight001; + moments[ix ][iy ][iz-1][3] += velmoments[3]*weight001; + moments[ix ][iy ][iz-1][4] += velmoments[4]*weight001; + moments[ix ][iy ][iz-1][5] += velmoments[5]*weight001; + moments[ix ][iy ][iz-1][6] += velmoments[6]*weight001; + moments[ix ][iy ][iz-1][7] += velmoments[7]*weight001; + moments[ix ][iy ][iz-1][8] += velmoments[8]*weight001; + moments[ix ][iy ][iz-1][9] += velmoments[9]*weight001; + + moments[ix ][iy-1][iz ][0] += velmoments[0]*weight010; + moments[ix ][iy-1][iz ][1] += velmoments[1]*weight010; + moments[ix ][iy-1][iz ][2] += velmoments[2]*weight010; + moments[ix ][iy-1][iz ][3] += velmoments[3]*weight010; + moments[ix ][iy-1][iz ][4] += velmoments[4]*weight010; + moments[ix ][iy-1][iz ][5] += velmoments[5]*weight010; + moments[ix ][iy-1][iz ][6] += velmoments[6]*weight010; + moments[ix ][iy-1][iz ][7] += velmoments[7]*weight010; + moments[ix ][iy-1][iz ][8] += velmoments[8]*weight010; + moments[ix ][iy-1][iz ][9] += velmoments[9]*weight010; + + moments[ix ][iy-1][iz-1][0] += velmoments[0]*weight011; + moments[ix ][iy-1][iz-1][1] += velmoments[1]*weight011; + moments[ix ][iy-1][iz-1][2] += velmoments[2]*weight011; + moments[ix ][iy-1][iz-1][3] += velmoments[3]*weight011; + moments[ix ][iy-1][iz-1][4] += velmoments[4]*weight011; + moments[ix ][iy-1][iz-1][5] += velmoments[5]*weight011; + moments[ix ][iy-1][iz-1][6] += velmoments[6]*weight011; + moments[ix ][iy-1][iz-1][7] += velmoments[7]*weight011; + moments[ix ][iy-1][iz-1][8] += velmoments[8]*weight011; + moments[ix ][iy-1][iz-1][9] += velmoments[9]*weight011; + + moments[ix-1][iy ][iz ][0] += velmoments[0]*weight100; + moments[ix-1][iy ][iz ][1] += velmoments[1]*weight100; + moments[ix-1][iy ][iz ][2] += velmoments[2]*weight100; + moments[ix-1][iy ][iz ][3] += velmoments[3]*weight100; + moments[ix-1][iy ][iz ][4] += velmoments[4]*weight100; + moments[ix-1][iy ][iz ][5] += velmoments[5]*weight100; + moments[ix-1][iy ][iz ][6] += velmoments[6]*weight100; + moments[ix-1][iy ][iz ][7] += velmoments[7]*weight100; + moments[ix-1][iy ][iz ][8] += velmoments[8]*weight100; + moments[ix-1][iy ][iz ][9] += velmoments[9]*weight100; + + moments[ix-1][iy ][iz-1][0] += velmoments[0]*weight101; + moments[ix-1][iy ][iz-1][1] += velmoments[1]*weight101; + moments[ix-1][iy ][iz-1][2] += velmoments[2]*weight101; + moments[ix-1][iy ][iz-1][3] += velmoments[3]*weight101; + moments[ix-1][iy ][iz-1][4] += velmoments[4]*weight101; + moments[ix-1][iy ][iz-1][5] += velmoments[5]*weight101; + moments[ix-1][iy ][iz-1][6] += velmoments[6]*weight101; + moments[ix-1][iy ][iz-1][7] += velmoments[7]*weight101; + moments[ix-1][iy ][iz-1][8] += velmoments[8]*weight101; + moments[ix-1][iy ][iz-1][9] += velmoments[9]*weight101; + + moments[ix-1][iy-1][iz ][0] += velmoments[0]*weight110; + moments[ix-1][iy-1][iz ][1] += velmoments[1]*weight110; + moments[ix-1][iy-1][iz ][2] += velmoments[2]*weight110; + moments[ix-1][iy-1][iz ][3] += velmoments[3]*weight110; + moments[ix-1][iy-1][iz ][4] += velmoments[4]*weight110; + moments[ix-1][iy-1][iz ][5] += velmoments[5]*weight110; + moments[ix-1][iy-1][iz ][6] += velmoments[6]*weight110; + moments[ix-1][iy-1][iz ][7] += velmoments[7]*weight110; + moments[ix-1][iy-1][iz ][8] += velmoments[8]*weight110; + moments[ix-1][iy-1][iz ][9] += velmoments[9]*weight110; + + moments[ix-1][iy-1][iz-1][0] += velmoments[0]*weight111; + moments[ix-1][iy-1][iz-1][1] += velmoments[1]*weight111; + moments[ix-1][iy-1][iz-1][2] += velmoments[2]*weight111; + moments[ix-1][iy-1][iz-1][3] += velmoments[3]*weight111; + moments[ix-1][iy-1][iz-1][4] += velmoments[4]*weight111; + moments[ix-1][iy-1][iz-1][5] += velmoments[5]*weight111; + moments[ix-1][iy-1][iz-1][6] += velmoments[6]*weight111; + moments[ix-1][iy-1][iz-1][7] += velmoments[7]*weight111; + moments[ix-1][iy-1][iz-1][8] += velmoments[8]*weight111; + moments[ix-1][iy-1][iz-1][9] += velmoments[9]*weight111; + + //double weight[2][2][2]; + //weight[0][0][0]=weight000; + //weight[0][0][1]=weight001; + //weight[0][1][0]=weight010; + //weight[0][1][1]=weight011; + //weight[1][0][0]=weight100; + //weight[1][0][1]=weight101; + //weight[1][1][0]=weight110; + //weight[1][1][1]=weight111; + //// + //for(int jx=0;jx<2;jx++) + //for(int jy=0;jy<2;jy++) + //for(int jz=0;jz<2;jz++) + //for(int m=0;m<10;m++) + //{ + // moments[ix-jx][iy-jy][iz-jz][m] += velmoments[m]*weight[jx][jy][jz]; + //} + } + + if(b10moments) + { + // use the weight to distribute the moments + // + // add charge density + //speciesMoments.addRho(weight, ix, iy, iz); + rho[ix ][iy ][iz ] += weight000; + rho[ix ][iy ][iz-1] += weight001; + rho[ix ][iy-1][iz ] += weight010; + rho[ix ][iy-1][iz-1] += weight011; + rho[ix-1][iy ][iz ] += weight100; + rho[ix-1][iy ][iz-1] += weight101; + rho[ix-1][iy-1][iz ] += weight110; + rho[ix-1][iy-1][iz-1] += weight111; + // add current density - X + //speciesMoments.addJx(temp, ix, iy, iz); + Jx[ix ][iy ][iz ] += ui*weight000; + Jx[ix ][iy ][iz-1] += ui*weight001; + Jx[ix ][iy-1][iz ] += ui*weight010; + Jx[ix ][iy-1][iz-1] += ui*weight011; + Jx[ix-1][iy ][iz ] += ui*weight100; + Jx[ix-1][iy ][iz-1] += ui*weight101; + Jx[ix-1][iy-1][iz ] += ui*weight110; + Jx[ix-1][iy-1][iz-1] += ui*weight111; + // add current density - Y + //speciesMoments.addJy(temp, ix, iy, iz); + Jy[ix ][iy ][iz ] += vi*weight000; + Jy[ix ][iy ][iz-1] += vi*weight001; + Jy[ix ][iy-1][iz ] += vi*weight010; + Jy[ix ][iy-1][iz-1] += vi*weight011; + Jy[ix-1][iy ][iz ] += vi*weight100; + Jy[ix-1][iy ][iz-1] += vi*weight101; + Jy[ix-1][iy-1][iz ] += vi*weight110; + Jy[ix-1][iy-1][iz-1] += vi*weight111; + // add current density - Z + //speciesMoments.addJz(temp, ix, iy, iz); + Jz[ix ][iy ][iz ] += wi*weight000; + Jz[ix ][iy ][iz-1] += wi*weight001; + Jz[ix ][iy-1][iz ] += wi*weight010; + Jz[ix ][iy-1][iz-1] += wi*weight011; + Jz[ix-1][iy ][iz ] += wi*weight100; + Jz[ix-1][iy ][iz-1] += wi*weight101; + Jz[ix-1][iy-1][iz ] += wi*weight110; + Jz[ix-1][iy-1][iz-1] += wi*weight111; + // Pxx - add pressure tensor + //speciesMoments.addPxx(temp, ix, iy, iz); + Pxx[ix ][iy ][iz ] += uui*weight000; + Pxx[ix ][iy ][iz-1] += uui*weight001; + Pxx[ix ][iy-1][iz ] += uui*weight010; + Pxx[ix ][iy-1][iz-1] += uui*weight011; + Pxx[ix-1][iy ][iz ] += uui*weight100; + Pxx[ix-1][iy ][iz-1] += uui*weight101; + Pxx[ix-1][iy-1][iz ] += uui*weight110; + Pxx[ix-1][iy-1][iz-1] += uui*weight111; + // Pxy - add pressure tensor + //speciesMoments.addPxy(temp, ix, iy, iz); + Pxy[ix ][iy ][iz ] += uvi*weight000; + Pxy[ix ][iy ][iz-1] += uvi*weight001; + Pxy[ix ][iy-1][iz ] += uvi*weight010; + Pxy[ix ][iy-1][iz-1] += uvi*weight011; + Pxy[ix-1][iy ][iz ] += uvi*weight100; + Pxy[ix-1][iy ][iz-1] += uvi*weight101; + Pxy[ix-1][iy-1][iz ] += uvi*weight110; + Pxy[ix-1][iy-1][iz-1] += uvi*weight111; + // Pxz - add pressure tensor + //speciesMoments.addPxz(temp, ix, iy, iz); + Pxz[ix ][iy ][iz ] += uwi*weight000; + Pxz[ix ][iy ][iz-1] += uwi*weight001; + Pxz[ix ][iy-1][iz ] += uwi*weight010; + Pxz[ix ][iy-1][iz-1] += uwi*weight011; + Pxz[ix-1][iy ][iz ] += uwi*weight100; + Pxz[ix-1][iy ][iz-1] += uwi*weight101; + Pxz[ix-1][iy-1][iz ] += uwi*weight110; + Pxz[ix-1][iy-1][iz-1] += uwi*weight111; + // Pyy - add pressure tensor + //speciesMoments.addPyy(temp, ix, iy, iz); + Pyy[ix ][iy ][iz ] += vvi*weight000; + Pyy[ix ][iy ][iz-1] += vvi*weight001; + Pyy[ix ][iy-1][iz ] += vvi*weight010; + Pyy[ix ][iy-1][iz-1] += vvi*weight011; + Pyy[ix-1][iy ][iz ] += vvi*weight100; + Pyy[ix-1][iy ][iz-1] += vvi*weight101; + Pyy[ix-1][iy-1][iz ] += vvi*weight110; + Pyy[ix-1][iy-1][iz-1] += vvi*weight111; + // Pyz - add pressure tensor + //speciesMoments.addPyz(temp, ix, iy, iz); + Pyz[ix ][iy ][iz ] += vwi*weight000; + Pyz[ix ][iy ][iz-1] += vwi*weight001; + Pyz[ix ][iy-1][iz ] += vwi*weight010; + Pyz[ix ][iy-1][iz-1] += vwi*weight011; + Pyz[ix-1][iy ][iz ] += vwi*weight100; + Pyz[ix-1][iy ][iz-1] += vwi*weight101; + Pyz[ix-1][iy-1][iz ] += vwi*weight110; + Pyz[ix-1][iy-1][iz-1] += vwi*weight111; + // Pzz - add pressure tensor + //speciesMoments.addPzz(temp, ix, iy, iz); + Pzz[ix ][iy ][iz ] += wwi*weight000; + Pzz[ix ][iy ][iz-1] += wwi*weight001; + Pzz[ix ][iy-1][iz ] += wwi*weight010; + Pzz[ix ][iy-1][iz-1] += wwi*weight011; + Pzz[ix-1][iy ][iz ] += wwi*weight100; + Pzz[ix-1][iy ][iz-1] += wwi*weight101; + Pzz[ix-1][iy-1][iz ] += wwi*weight110; + Pzz[ix-1][iy-1][iz-1] += wwi*weight111; + } + + if(b10moments && bmoments10) + { + // check work + for(int jx=0;jx<2;jx++) + for(int jy=0;jy<2;jy++) + for(int jz=0;jz<2;jz++) + { + //dprintf("gothere"); + //dprintf("%24.16f == rho[ix-jx][iy-jy][iz-jz]", rho[ix-jx][iy-jy][iz-jz]); + //dprintf("%24.16f == moments[ix-jx][iy-jy][iz-jz][0]", moments[ix-jx][iy-jy][iz-jz][0]); + assert_eq(rho[ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][0]); + //dprintf("gothere"); + assert_eq(Jx [ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][1]); + assert_eq(Jy [ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][2]); + assert_eq(Jz [ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][3]); + assert_eq(Pxx[ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][4]); + assert_eq(Pxy[ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][5]); + assert_eq(Pxz[ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][6]); + assert_eq(Pyy[ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][7]); + assert_eq(Pyz[ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][8]); + assert_eq(Pzz[ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][9]); + } + } + } + // split up the reduction tasks. + // + if(b10moments) + { // - moments[ix-1][iy-1][iz-1][0] += velmoments[0]*weight111; - moments[ix-1][iy-1][iz-1][1] += velmoments[1]*weight111; - moments[ix-1][iy-1][iz-1][2] += velmoments[2]*weight111; - moments[ix-1][iy-1][iz-1][3] += velmoments[3]*weight111; - moments[ix-1][iy-1][iz-1][4] += velmoments[4]*weight111; - moments[ix-1][iy-1][iz-1][5] += velmoments[5]*weight111; - moments[ix-1][iy-1][iz-1][6] += velmoments[6]*weight111; - moments[ix-1][iy-1][iz-1][7] += velmoments[7]*weight111; - moments[ix-1][iy-1][iz-1][8] += velmoments[8]*weight111; - moments[ix-1][iy-1][iz-1][9] += velmoments[9]*weight111; + // One-dimensional array access is presumably + // more efficient on poor compilers. + double* rho1d = &rho[0][0][0]; + double* Jx1d = &Jx [0][0][0]; + double* Jy1d = &Jy [0][0][0]; + double* Jz1d = &Jz [0][0][0]; + double* Pxx1d = &Pxx[0][0][0]; + double* Pxy1d = &Pxy[0][0][0]; + double* Pxz1d = &Pxz[0][0][0]; + double* Pyy1d = &Pyy[0][0][0]; + double* Pyz1d = &Pyz[0][0][0]; + double* Pzz1d = &Pzz[0][0][0]; + //// + assert_eq(speciesMoments.get_nx(), nxn); + assert_eq(speciesMoments.get_ny(), nyn); + assert_eq(speciesMoments.get_nz(), nzn); + const int numel = nxn*nyn*nzn; + #pragma omp critical + for(int i=0;ifree(); + delete momentsArray[i]; + moments10[i]->free(); } - free(momentsArray); + delete [] momentsArray; + free(moments10); } diff --git a/include/Alloc.h b/include/Alloc.h index 46c68fd9..8e589321 100644 --- a/include/Alloc.h +++ b/include/Alloc.h @@ -609,7 +609,7 @@ namespace iPic3D { check_idx_bounds(n4,n3,n2,n1); arr4[n4][n3][n2][n1] = value; } #endif protected: - void setall(double val) + void setall(type val) { #pragma omp for for(int i=0;i::set(n4,n3,n2,n1, value); } void free(){ delArray4((type****)arr4); } type**** fetch_arr4(){ return (type****) arr4; } - void setall(double val) { const_array_ref4::setall(val); } + void setall(type val) { const_array_ref4::setall(val); } //bool verify_dims(size_t s4, size_t s3, size_t s2, size_t s1){ // if(s4==S4 && s3==S3 && s2==S2 && s1==S1) return true; // Wprintf("%d==%d && %d==%d && %d==%d && %d==%d failed", diff --git a/include/EMfields3D.h b/include/EMfields3D.h index d0653090..b2105b0b 100644 --- a/include/EMfields3D.h +++ b/include/EMfields3D.h @@ -119,7 +119,7 @@ class EMfields3D // :public Field void communicateGhostP2G(int ns, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, VirtualTopology3D * vct); void sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTopology3D * vct); /*! add accumulated moments to the moments for a given species */ - void addToSpeciesMoments(const Moments & in, int is); + //void addToSpeciesMoments(const Moments & in, int is); /*! add an amount of charge density to charge density field at node X,Y,Z */ void addRho(double weight[][2][2], int X, int Y, int Z, int is); /*! add an amount of current density - direction X to current density field at node X,Y,Z */ @@ -256,11 +256,16 @@ class EMfields3D // :public Field double getBenergy(); /*! fetch array for summing moments of thread i */ - arr4_double fetch_momentsArray(int i){ + Moments& fetch_momentsArray(int i){ assert_le(0,i); assert_le(i,sizeMomentsArray); return *momentsArray[i]; } + arr4_double fetch_moments10(int i){ + assert_le(0,i); + assert_le(i,sizeMomentsArray); + return *moments10[i]; + } /*! print electromagnetic fields info */ void print(void) const; @@ -384,8 +389,8 @@ class EMfields3D // :public Field array3_double divC; /* temporary arrays for summing moments */ int sizeMomentsArray; - //Moments **momentsArray; - arr4_double** momentsArray; + Moments **momentsArray; + arr4_double** moments10; // ******************************************************************************* From 9f0e972bceb05c789343c0ba903388e0e58e1b1b Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 12 Aug 2013 21:15:18 +0200 Subject: [PATCH 032/118] turned off use of moments10 introduced in dc4a2dcb81: not working with -openmp for some unclear reason that I don't have time to isolate. --- fields/EMfields3D.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 7cfea8f1..93e4cf9c 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -223,8 +223,8 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop double const*const q = pcls.getQall(); // const int is = pcls.get_ns(); - bool bmoments10 = true; - bool b10moments = false; + bool bmoments10 = false; + bool b10moments = true; // turn on doing it the old way // if b10moments double* rhons1d = &rhons[is][0][0][0]; @@ -536,6 +536,16 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop Pzz[ix-1][iy-1][iz-1] += wwi*weight111; } + // why on earth do I observe the following: + // * without openmp, b10moments and bmoments10 gives same results, + // * b10moments gives same results with and without openmp, and + // * bmoments10 gives wrong results when I use openmp. + // I'm using Moments class and moments array exactly the same way + // as far as openmp is concerned... To isolate the problem, + // gradually morph Moments class until implemented via arr4_double... + // Problem in constructor? + // + // if(b10moments && bmoments10) { // check work From 07a20af3f2092aec1718f95b3ae415ff4952ad34 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 24 Sep 2013 11:42:03 +0200 Subject: [PATCH 033/118] issue #46: implemented cmake support for MIC cross-compile (Xeon and Xeon Phi) --- CMakeLists.txt | 59 ++++++++++-- cmake/cmake_template.cmake.XeonPhi | 15 +++ fields/EMfields3D.cpp | 141 ++++++++++++++++++++++++++++- inputfiles/Random.inp | 56 +++++++----- main/iPic3Dlib.cpp | 7 ++ 5 files changed, 248 insertions(+), 30 deletions(-) create mode 100755 cmake/cmake_template.cmake.XeonPhi diff --git a/CMakeLists.txt b/CMakeLists.txt index 7e94ab5e..fc94308a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,9 @@ -cmake_minimum_required(VERSION 2.8.8) - +cmake_minimum_required(VERSION 2.8.8) +# compiler set in ../cmake/cmake_template.cmake.XeonPhi +message ("for Xeon Phi:") +message ("cmake .. -DCMAKE_TOOLCHAIN_FILE=../cmake/cmake_template.cmake.XeonPhi") +#message ("for Xeon:") +#message ("cmake .. -DCMAKE_TOOLCHAIN_FILE=../cmake/cmake_template.cmake.Xeon") # # Project declaration # @@ -13,17 +17,40 @@ project(iPic3D) set(EXECUTABLE_OUTPUT_PATH work/${CMAKE_BUILD_TYPE}) set(LIBRARY_OUTPUT_PATH lib) +# +# Set compiler flags per system +# +if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "k1om") ## Xeon Phi + set(CMAKE_CXX_FLAGS "-O3 -openmp -fno-exceptions -vec-report -mmic") + #set(CMAKE_CXX_FLAGS "-openmp -g -mmic") # set flags for Xeon Phi, totalview +elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64") ## Xeon + set(CMAKE_CXX_COMPILER "icpc") + set(CMAKE_CXX_FLAGS "-O3 -openmp -fno-exceptions -vec-report") +else() + set(CMAKE_CXX_FLAGS "-O3") +endif() + # # Find third class libraries # +if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "k1om") ## Xeon Phi + set(CMAKE_PREFIX_PATH /opt/hdf5/1.8.10-patch1-mic) + set(VARIOUS_LIB /opt/intel/lib/mic) + set (EXTRA_LIBS ${VARIOUS_LIB}/libimf.so ${VARIOUS_LIB}/libsvml.so ${VARIOUS_LIB}/libirng.so ${VARIOUS_LIB}/libintlc.so ${MPELIB}) +else() + find_package(MPI REQUIRED) + set(EXTRA_LIBS "") +endif() find_package(HDF5 COMPONENTS HL C REQUIRED) -find_package(MPI REQUIRED) +message("HDF5_INCLUDE_DIRS is ${HDF5_INCLUDE_DIRS}") + # # include and lib directories # +# include_directories: files there are accessible to the project include_directories( include ${HDF5_INCLUDE_DIRS} @@ -33,6 +60,7 @@ include_directories( link_directories( ${HDF5_LIBRARY_DIRS} ${MPI_LIBRARY_DIRS} + ${EXTRA_LIBS} ) # @@ -91,10 +119,12 @@ add_executable( iPic3D.cpp ) +#build iPic as a library also +#libiPic3Dlib.so in folder lib add_library( - iPic3Dlib - SHARED - ${inc_files} + iPic3Dlib #name of the library + SHARED #type of the library + ${inc_files} # stuff to build the library ${src_files} ) @@ -107,9 +137,26 @@ target_link_libraries( ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES} ${MPI_LIBRARIES} + ${EXTRA_LIBS} ) target_link_libraries( iPic3D iPic3Dlib ) + + +## to save the executable in the folder where the CMakeLists.txt file is, i.e. CMAKE_CURRENT_SOURCE_DIR +set_target_properties(iPic3D PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + +## debug releases have a _d appended to the executable +set_target_properties(iPic3D PROPERTIES DEBUG_POSTFIX "_d") + + +message("Which system am I compiling for:") +message("CMAKE_SYSTEM_PROCESSOR is ${CMAKE_SYSTEM_PROCESSOR}") + +message("Compiler & compiler flags:") +message("CMAKE_CXX_COMPILER is ${CMAKE_CXX_COMPILER}") +message("CMAKE_CXX_FLAGS is ${CMAKE_CXX_FLAGS}") + diff --git a/cmake/cmake_template.cmake.XeonPhi b/cmake/cmake_template.cmake.XeonPhi new file mode 100755 index 00000000..fbd0d0c3 --- /dev/null +++ b/cmake/cmake_template.cmake.XeonPhi @@ -0,0 +1,15 @@ +SET(CMAKE_SYSTEM_NAME Linux) +SET(CMAKE_SYSTEM_PROCESSOR k1om) +SET(CMAKE_SYSTEM_VERSION 1) + +# specify the cross compiler +SET(CMAKE_C_COMPILER icc) +SET(CMAKE_CXX_COMPILER icpc) +SET(MPI_C_COMPILER mpiicc) +SET(CMAKE_CXX_COMPILER mpiicpc) +#SET(CMAKE_CXX_FLAGS "-openmp -O3 -mmic") +SET(_CMAKE_TOOLCHAIN_PREFIX x86_64-k1om-linux-) + +# where is the target environment +#SET(CMAKE_FIND_ROOT_PATH /usr/linux-k1om-4.7) +SET(CMAKE_FIND_ROOT_PATH /opt/modules/knc/mic) #path to the Intel(R) Manycore Platform Software Stack, as in http://software.intel.com/en-us/articles/cross-compilation-for-intel-xeon-phi-coprocessor-with-cmake diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 93e4cf9c..d878b3a8 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -2275,8 +2275,9 @@ void EMfields3D::initGEMnoPert(VirtualTopology3D * vct, Grid * grid, Collective init(vct, grid, col); // use the fields from restart file } } - -void EMfields3D::initRandomField(VirtualTopology3D * vct, Grid * grid, Collective *col) { +/* old init, Random problem */ +#if 0 +void EMfields3D::initRandomFieldOld(VirtualTopology3D * vct, Grid * grid, Collective *col) { double **modes_seed = newArr2(double, 7, 7); if (restart1 == 0) { // initialize @@ -2336,7 +2337,7 @@ void EMfields3D::initRandomField(VirtualTopology3D * vct, Grid * grid, Collectiv Bzn[i][j][k] += B0x * cos(grid->getXN(i, j, k) * kx + grid->getYN(i, j, k) * ky + 2.0 * M_PI * modes_seed[m + 3][n + 3]); } - /* for (int m=1; m < 4; m++) for (int n=1; n < 4; n++){ kx=2.0*M_PI*m/Lx; ky=2.0*M_PI*n/Ly; Bxn[i][j][k] += B0x/kx*cos(grid->getXN(i,j,k)*kx+grid->getYN(i,j,k)*ky+2.0*M_PI*phixy); Byn[i][j][k] += B0x/ky*cos(grid->getXN(i,j,k)*kx+grid->getYN(i,j,k)*ky+2.0*M_PI*phixy); Bzn[i][j][k] += B0x/(kx+ky)*cos(grid->getXN(i,j,k)*kx+grid->getYN(i,j,k)*ky+2.0*M_PI*phiz); } for(int n=1; n < 4; n++){ ky=2.0*M_PI*n/Ly; Bxn[i][j][k] += B0x/(2.0*M_PI/Lx)*cos(grid->getYN(i,j,k)*ky+2.0*M_PI*phix); } for(int m=1; m < 4; m++){ kx=2.0*M_PI*m/Lx; Byn[i][j][k] += B0x/(2.0*M_PI/Ly)*cos(grid->getXN(i,j,k)*kx+2.0*M_PI*phiy); } */ + } // communicate ghost communicateNodeBC(nxn, nyn, nzn, Bxn, col->bcBx[0],col->bcBx[1],col->bcBx[2],col->bcBx[3],col->bcBx[4],col->bcBx[5], vct); @@ -2358,6 +2359,140 @@ void EMfields3D::initRandomField(VirtualTopology3D * vct, Grid * grid, Collectiv } delArr2(modes_seed, 7); } +#endif + +// new init, random problem +void EMfields3D::initRandomField(VirtualTopology3D *vct, Grid *grid, Collective *col) +{ + double **modes_seed = newArr2(double, 7, 7); + if (restart1 ==0){ + // initialize + if (vct->getCartesian_rank() ==0){ + cout << "------------------------------------------" << endl; + cout << "Initialize GEM Challenge with Pertubation" << endl; + cout << "------------------------------------------" << endl; + cout << "B0x = " << B0x << endl; + cout << "B0y = " << B0y << endl; + cout << "B0z = " << B0z << endl; + cout << "Delta (current sheet thickness) = " << delta << endl; + for (int i=0; i < ns; i++){ + cout << "rho species " << i <<" = " << rhoINIT[i]; + if (DriftSpecies[i]) + cout << " DRIFTING " << endl; + else + cout << " BACKGROUND " << endl; + } + cout << "-------------------------" << endl; + } + double kx; + double ky; + + /* stringstream num_proc; + num_proc << vct->getCartesian_rank() ; + string cqsat = SaveDirName + "/RandomNumbers" + num_proc.str() + ".txt"; + ofstream my_file(cqsat.c_str(), fstream::binary); + for (int m=-3; m < 4; m++) + for (int n=-3; n < 4; n++){ + modes_seed[m+3][n+3] = rand() / (double) RAND_MAX; + my_file <<"modes_seed["<< m+3<<"][" << "\t" << n+3 << "] = " << modes_seed[m+3][n+3] << endl; + } + my_file.close(); + */ + modes_seed[0][0] = 0.532767; + modes_seed[0][1] = 0.218959; + modes_seed[0][2] = 0.0470446; + modes_seed[0][3] = 0.678865; + modes_seed[0][4] = 0.679296; + modes_seed[0][5] = 0.934693; + modes_seed[0][6] = 0.383502; + modes_seed[1][0] = 0.519416; + modes_seed[1][1] = 0.830965; + modes_seed[1][2] = 0.0345721; + modes_seed[1][3] = 0.0534616; + modes_seed[1][4] = 0.5297; + modes_seed[1][5] = 0.671149; + modes_seed[1][6] = 0.00769819; + modes_seed[2][0] = 0.383416; + modes_seed[2][1] = 0.0668422; + modes_seed[2][2] = 0.417486; + modes_seed[2][3] = 0.686773; + modes_seed[2][4] = 0.588977; + modes_seed[2][5] = 0.930436; + modes_seed[2][6] = 0.846167; + modes_seed[3][0] = 0.526929; + modes_seed[3][1] = 0.0919649; + modes_seed[3][2] = 0.653919; + modes_seed[3][3] = 0.415999; + modes_seed[3][4] = 0.701191; + modes_seed[3][5] = 0.910321; + modes_seed[3][6] = 0.762198; + modes_seed[4][0] = 0.262453; + modes_seed[4][1] = 0.0474645; + modes_seed[4][2] = 0.736082; + modes_seed[4][3] = 0.328234; + modes_seed[4][4] = 0.632639; + modes_seed[4][5] = 0.75641; + modes_seed[4][6] = 0.991037; + modes_seed[5][0] = 0.365339; + modes_seed[5][1] = 0.247039; + modes_seed[5][2] = 0.98255; + modes_seed[5][3] = 0.72266; + modes_seed[5][4] = 0.753356; + modes_seed[5][5] = 0.651519; + modes_seed[5][6] = 0.0726859; + modes_seed[6][0] = 0.631635; + modes_seed[6][1] = 0.884707; + modes_seed[6][2] = 0.27271; + modes_seed[6][3] = 0.436411; + modes_seed[6][4] = 0.766495; + modes_seed[6][5] = 0.477732; + modes_seed[6][6] = 0.237774; + + for (int i=0; i < nxn; i++) + for (int j=0; j < nyn; j++) + for (int k=0; k < nzn; k++){ + // initialize the density for species + for (int is=0; is < ns; is++){ + rhons[is][i][j][k] = rhoINIT[is]/FourPI; + } + // electric field + Ex[i][j][k] = 0.0; + Ey[i][j][k] = 0.0; + Ez[i][j][k] = 0.0; + // Magnetic field + Bxn[i][j][k] = 0.0; + Byn[i][j][k] = 0.0; + Bzn[i][j][k] = B0z; + for (int m=-3; m < 4; m++) + for (int n=-3; n < 4; n++){ + + kx=2.0*M_PI*m/Lx; + ky=2.0*M_PI*n/Ly; + Bxn[i][j][k] += -B0x*ky*cos(grid->getXN(i,j,k)*kx+grid->getYN(i,j,k)*ky+2.0*M_PI*modes_seed[m+3][n+3]); + Byn[i][j][k] += B0x*kx*cos(grid->getXN(i,j,k)*kx+grid->getYN(i,j,k)*ky+2.0*M_PI*modes_seed[m+3][n+3]); + // Bzn[i][j][k] += B0x*cos(grid->getXN(i,j,k)*kx+grid->getYN(i,j,k)*ky+2.0*M_PI*modes_seed[m+3][n+3]); + } + } + // communicate ghost + communicateNodeBC(nxn, nyn, nzn, Bxn, 1, 1, 2, 2, 1, 1, vct); + communicateNodeBC(nxn, nyn, nzn, Byn, 1, 1, 1, 1, 1, 1, vct); + communicateNodeBC(nxn, nyn, nzn, Bzn, 1, 1, 2, 2, 1, 1, vct); + // initialize B on centers + grid->interpN2C(Bxc, Bxn); + grid->interpN2C(Byc, Byn); + grid->interpN2C(Bzc, Bzn); + // communicate ghost + communicateCenterBC(nxc, nyc, nzc, Bxc, 2, 2, 2, 2, 2, 2, vct); + communicateCenterBC(nxc, nyc, nzc, Byc, 1, 1, 1, 1, 1, 1, vct); + communicateCenterBC(nxc, nyc, nzc, Bzc, 2, 2, 2, 2, 2, 2, vct); + for (int is=0 ; isinterpN2C(rhocs,is,rhons); + } else { + init(vct,grid, col); // use the fields from restart file + } + delArr2(modes_seed, 7); + } + /*! Init Force Free (JxB=0) */ void EMfields3D::initForceFree(VirtualTopology3D * vct, Grid * grid, Collective *col) { diff --git a/inputfiles/Random.inp b/inputfiles/Random.inp index 03209eb2..b48ef3a4 100644 --- a/inputfiles/Random.inp +++ b/inputfiles/Random.inp @@ -9,15 +9,26 @@ RestartDirName = data # set the maximum number of particles allocated NpMaxNpRatio = 3.0 +# New flags: +Case = RandomCase #goToDefault #RandomCase # Select the case +PoissonCorrection = no # Poisson correction +WriteMethod = default # Output method [ default | Parallel ] +SimulationName = RandomCase # Simulation name for the output + # %%%%%%%%%%%%%%%%%%% Magnetic Reconnection %%%%%%%%%%%%%%%%%% -B0x = 0.001 +B0x = 0.03 B0y = 0.00 -B0z = 0.00 +B0z = 0.03 delta = 0.5 +# External magnetic field parameters: +B1x = 0.000 +B1y = 0.000 +B1z = 0.000 + # %%%%%%%%%%%%%%%%%%% TIME %%%%%%%%%%%%%%%%%% -dt = 0.5 # dt = time step -ncycles = 2001 # cycles +dt = 0.05 # dt = time step +ncycles = 2001 #!!! # cycles th = 1.0 # th = decentering parameter c = 1.0 # c = light speed @@ -26,8 +37,8 @@ Smooth = 0.5 # Smoothing value (5-points stencil) # %%%%%%%%%%%%%%%%%% BOX SIZE %%%%%%%%%%%%%%% -Lx = 10.0 # Lx = simulation box length - x direction -Ly = 10.0 # Ly = simulation box length - y direction +Lx = 20.0 # Lx = simulation box length - x direction +Ly = 20.0 # Ly = simulation box length - y direction Lz = 1.0 # Lz = simulation box length - z direction x_center = 1. # Lx = simulation box length - x direction in m @@ -44,32 +55,35 @@ nzc = 1 # nzc = number of cells - z direction # 0 = electrons # 1 = protons # 2,3,4,5,... = ions -ns = 4 +ns = 2 # Initial density (make sure you are neutral) -rhoINIT = 0.01 0.01 0.2 0.2 +rhoINIT = 1.0 1.0 +# Injection density (make sure you are neutral) +rhoINJECT = 0.0 0.0 # TrackParticleID[species] = 1=true, 0=false --> Assign ID to particles -TrackParticleID = 0 0 0 0 +TrackParticleID = 0 0 # npcelx = number of particles per cell - Direction X -npcelx = 8 8 8 8 +npcelx = 3 3 # npcely = number of particles per cell - Direction Y -npcely = 8 8 8 8 +npcely = 3 3 # npcelz = number of particles per cell - Direction Z -npcelz = 1 1 1 1 +#####npcelz = 3 3 +npcelz = 1 1 # qom = charge to mass ratio for different species -qom = -256.0 1.0 -256.0 1.0 +qom = -256.0 1.0 # uth = thermal velocity for different species - Direction X -uth = 0.045 0.0063 0.045 0.0063 +uth = 0.045 0.0063 # vth = thermal velocity for different species - Direction Y -vth = 0.045 0.0063 0.045 0.0063 +vth = 0.045 0.0063 # wth = thermal velocity for different species - Direction Z -wth = 0.045 0.0063 0.045 0.0063 +wth = 0.045 0.0063 # u0 = drift velocity - Direction X -u0 = 0.0 0.0 0.0 0.0 +u0 = 0.0 0.0 # v0 = drift velocity - Direction Y -v0 = 0.0 0.0 0.0 0.0 +v0 = 0.0 0.0 # w0 = drift velocity - Direction Z -w0 = 0.000001 -0.000256 0.0 0.0 - +w0 = 0.0 0.0 +#w0 = 0.00325 -0.01624 0.0 0.0 # &&&&&&&&&&&& boundary conditions &&&&&&&&&&&&&&& # PHI Electrostatic Potential @@ -118,7 +132,7 @@ w0 = 0.000001 -0.000256 0.0 0.0 # mover predictor corrector iteration NiterMover = 3 # Output for field - FieldOutputCycle = 10 + FieldOutputCycle = 100 # Output for particles if 1 it doesnt save particles data ParticlesOutputCycle = 1 # restart cycle diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index b0cb1426..dd731c9d 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -69,6 +69,13 @@ int c_Solver::Init(int argc, char **argv) { else if (col->getCase()=="BATSRUS") EMf->initBATSRUS(vct,grid,col); #endif else if (col->getCase()=="Dipole") EMf->initDipole(vct,grid,col); + else if (col->getCase()=="RandomCase") { + EMf->initRandomField(vct,grid,col); + if (myrank==0) { + cout << "Case is " << col->getCase() <<"\n"; + cout <<"total # of particle per cell is " << col->getNpcel(0) << "\n"; + } + } else { if (myrank==0) { cout << " =========================================================== " << endl; From 28665ed6889034a8e75b30d9b9bcc2e340af1502 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Thu, 26 Sep 2013 14:22:17 +0200 Subject: [PATCH 034/118] issue #10: XLEN divides nxc should be enforced --- grids/Grid3DCU.cpp | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/grids/Grid3DCU.cpp b/grids/Grid3DCU.cpp index 755f3096..5e7c3e2b 100644 --- a/grids/Grid3DCU.cpp +++ b/grids/Grid3DCU.cpp @@ -4,24 +4,23 @@ /*! constructor */ Grid3DCU::Grid3DCU(CollectiveIO * col, VirtualTopology3D * vct) { - // FOR TESTS - this must be uncommented next // int get_rank(); // if(!get_rank()) - // { - // fflush(stdout); - // bool xerror = false; - // bool yerror = false; - // bool zerror = false; - // if((col->getNxc()) % (vct->getXLEN())) xerror=true; - // if((col->getNyc()) % (vct->getYLEN())) yerror=true; - // if((col->getNzc()) % (vct->getZLEN())) zerror=true; - // if(xerror) printf("!!!ERROR: XLEN=%d does not divide nxc=%d\n", vct->getXLEN(),col->getNxc()); - // if(yerror) printf("!!!ERROR: YLEN=%d does not divide nyc=%d\n", vct->getYLEN(),col->getNyc()); - // if(zerror) printf("!!!ERROR: ZLEN=%d does not divide nzc=%d\n", vct->getZLEN(),col->getNzc()); - // fflush(stdout); - // bool error = xerror||yerror||zerror; - // if(error) exit(1); - // } + { + fflush(stdout); + bool xerror = false; + bool yerror = false; + bool zerror = false; + if((col->getNxc()) % (vct->getXLEN())) xerror=true; + if((col->getNyc()) % (vct->getYLEN())) yerror=true; + if((col->getNzc()) % (vct->getZLEN())) zerror=true; + if(xerror) printf("!!!ERROR: XLEN=%d does not divide nxc=%d\n", vct->getXLEN(),col->getNxc()); + if(yerror) printf("!!!ERROR: YLEN=%d does not divide nyc=%d\n", vct->getYLEN(),col->getNyc()); + if(zerror) printf("!!!ERROR: ZLEN=%d does not divide nzc=%d\n", vct->getZLEN(),col->getNzc()); + fflush(stdout); + bool error = xerror||yerror||zerror; + if(error) exit(1); + } // add 2 for the guard cells nxc = (col->getNxc()) / (vct->getXLEN()) + 2; nyc = (col->getNyc()) / (vct->getYLEN()) + 2; From 8b81a53ea9b33f5de5f4d462dafa8e7621c7b759 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Thu, 26 Sep 2013 14:23:47 +0200 Subject: [PATCH 035/118] issue #47: created communication/VCtopology3D.cpp to speed recompile for XLEN --- communication/VCtopology3D.cpp | 118 +++++++++++++ include/VCtopology3D.h | 296 +++------------------------------ 2 files changed, 145 insertions(+), 269 deletions(-) create mode 100644 communication/VCtopology3D.cpp diff --git a/communication/VCtopology3D.cpp b/communication/VCtopology3D.cpp new file mode 100644 index 00000000..5d0da864 --- /dev/null +++ b/communication/VCtopology3D.cpp @@ -0,0 +1,118 @@ +#include "mpi.h" +#include "Alloc.h" +#include "VCtopology3D.h" +#include + +using std::cout; +using std::endl; + +/** DEFINE THE Topology HERE, setting XLEN,YLEN,ZLEN */ +VCtopology3D::VCtopology3D() { + // ******************************************* + // ******************************************* + // change these values to change the topology + XLEN = 2; + YLEN = 2; + ZLEN = 1; + nprocs = XLEN * YLEN * ZLEN; + // here you have to set the topology for the fields + PERIODICX = true; + PERIODICY = false; + PERIODICZ = true; + // here you have to set the topology for the Particles + PERIODICX_P = true; + PERIODICY_P = false; + PERIODICZ_P = true; + // ******************************************* + // ******************************************* + XDIR = 0; + YDIR = 1; + ZDIR = 2; + RIGHT = 1; + LEFT = -1; + + reorder = 1; + + divisions[0] = XLEN; + divisions[1] = YLEN; + divisions[2] = ZLEN; + + periods[0] = PERIODICX; + periods[1] = PERIODICY; + periods[2] = PERIODICZ; + + periods_P[0] = PERIODICX_P; + periods_P[1] = PERIODICY_P; + periods_P[2] = PERIODICZ_P; + + + cVERBOSE = false; // communication verbose ? + +} + + + + + +/** Within CART_COMM, processes find about their new rank numbers, their cartesian coordinates, + and their neighbors */ +void VCtopology3D::setup_vctopology(MPI_Comm old_comm) { + // create a matrix with ranks, and neighbours for fields + MPI_Cart_create(old_comm, 3, divisions, periods, reorder, &CART_COMM); + // create a matrix with ranks, and neighbours for Particles + MPI_Cart_create(old_comm, 3, divisions, periods_P, reorder, &CART_COMM_P); + // field Communicator + if (CART_COMM != MPI_COMM_NULL) { + MPI_Comm_rank(CART_COMM, &cartesian_rank); + MPI_Cart_coords(CART_COMM, cartesian_rank, 3, coordinates); + + MPI_Cart_shift(CART_COMM, XDIR, RIGHT, &xleft_neighbor, &xright_neighbor); + MPI_Cart_shift(CART_COMM, YDIR, RIGHT, &yleft_neighbor, &yright_neighbor); + MPI_Cart_shift(CART_COMM, ZDIR, RIGHT, &zleft_neighbor, &zright_neighbor); + } + else { + // EXCEPTION + cout << "A process is trown away from the new topology for fields. VCtopology3D.h" << endl; + } + // Particles Communicator + if (CART_COMM_P != MPI_COMM_NULL) { + MPI_Comm_rank(CART_COMM_P, &cartesian_rank); + MPI_Cart_coords(CART_COMM_P, cartesian_rank, 3, coordinates); + + MPI_Cart_shift(CART_COMM_P, XDIR, RIGHT, &xleft_neighbor_P, &xright_neighbor_P); + MPI_Cart_shift(CART_COMM_P, YDIR, RIGHT, &yleft_neighbor_P, &yright_neighbor_P); + MPI_Cart_shift(CART_COMM_P, ZDIR, RIGHT, &zleft_neighbor_P, &zright_neighbor_P); + } + else { + // EXCEPTION + cout << "A process is trown away from the new topology for Particles. VCtopology3D.h" << endl; + } + +} +/** destructor */ +VCtopology3D::~VCtopology3D() { + +} +/** print topology info */ +void VCtopology3D::Print() { + cout << endl; + cout << "Virtual Cartesian Processors Topology" << endl; + cout << "-------------------------------------" << endl; + cout << "Processors grid: " << XLEN << "x" << YLEN << "x" << ZLEN << endl; + cout << "Periodicity Field X: " << periods[0] << endl; + cout << "Periodicity Field Y: " << periods[1] << endl; + cout << "Periodicity Field z: " << periods[2] << endl; + cout << "Periodicity Particles X: " << periods_P[0] << endl; + cout << "Periodicity Particles Y: " << periods_P[1] << endl; + cout << "Periodicity Particles z: " << periods_P[2] << endl; + cout << endl; +} +/** print cartesian rank of neighbors and coordinate of process */ +void VCtopology3D::PrintMapping() { + cout << endl; + cout << "Mapping of process " << cartesian_rank << endl; + cout << "----------------------" << endl; + cout << "Coordinates: X = " << coordinates[0] << "; Y = " << coordinates[1] << "; Z = " << coordinates[2] << endl; + cout << "Neighbors: xLeft = " << xleft_neighbor << "; xRight = " << xright_neighbor << "; yLeft = " << yleft_neighbor << "; yRight = " << yright_neighbor << "; zLeft = " << zleft_neighbor << "; zRight = " << zright_neighbor << endl; + cout << endl; +} diff --git a/include/VCtopology3D.h b/include/VCtopology3D.h index c7e2cc06..2bab31ed 100644 --- a/include/VCtopology3D.h +++ b/include/VCtopology3D.h @@ -14,15 +14,7 @@ developers : Stefano Markidis, Giovanni Lapenta #ifndef VCtopology3D_H #define VCtopology3D_H -#include "mpi.h" #include "VirtualTopology3D.h" -#include "Alloc.h" -#include - - - -using std::cout; -using std::endl; /** * @@ -48,56 +40,33 @@ class VCtopology3D:public VirtualTopology3D { void Print(); /** Print the mapping of topology */ void PrintMapping(); - /** get XLEN */ - int getXLEN(); - /** get YLEN */ - int getYLEN(); - /** get ZLEN */ - int getZLEN(); - /** get nprocs */ - int getNprocs(); - /** get periodicity on boundaries - DIRECTION X*/ - bool getPERIODICX(); - /** get periodicity on boundaries - DIRECTION Y*/ - bool getPERIODICY(); - /** get periodicity on boundaries - DIRECTION Z*/ - bool getPERIODICZ(); - /** get the cartesian rank of the process */ - int getCartesian_rank(); - /** get the cartesian rank of XLEFT neighbor */ - int getXleft_neighbor(); - /** get the cartesian rank of XRIGHT neighbor */ - int getXright_neighbor(); - /** get the cartesian rank of YLEFT neighbor */ - int getYleft_neighbor(); - /** get the cartesian rank of YRIGHT neighbor */ - int getYright_neighbor(); - /** get the cartesian rank of ZLEFT neighbor */ - int getZleft_neighbor(); - /** get the cartesian rank of ZRIGHT neighbor */ - int getZright_neighbor(); - /** get the cartesian rank of XLEFT neighbor */ - int getXleft_neighbor_P(); - /** get the cartesian rank of XRIGHT neighbor */ - int getXright_neighbor_P(); - /** get the cartesian rank of YLEFT neighbor */ - int getYleft_neighbor_P(); - /** get the cartesian rank of YRIGHT neighbor */ - int getYright_neighbor_P(); - /** get the cartesian rank of ZLEFT neighbor */ - int getZleft_neighbor_P(); - /** get the cartesian rank of ZRIGHT neighbor */ - int getZright_neighbor_P(); - /** get the coordinates in dir direction of process*/ - int getCoordinates(int dir); - /** get the coordinates of process*/ - int *getCoordinates(); - /** get Periodicity condition in dir direction */ - int getPeriods(int dir); - /** if cVERBOSE == true, print to the screen all the comunication */ - bool getcVERBOSE(); - /** get the MPI communicator */ - MPI_Comm getComm(); + + int getXLEN() { return (XLEN); } + int getYLEN() { return (YLEN); } + int getZLEN() { return (ZLEN); } + int getNprocs() { return (nprocs); } + bool getPERIODICX() { return (PERIODICX); } + bool getPERIODICY() { return (PERIODICY); } + bool getPERIODICZ() { return (PERIODICZ); } + int getCartesian_rank() { return (cartesian_rank); } + int getXleft_neighbor() { return (xleft_neighbor); } + int getXright_neighbor() { return (xright_neighbor); } + int getYleft_neighbor() { return (yleft_neighbor); } + int getYright_neighbor() { return (yright_neighbor); } + int getZleft_neighbor() { return (zleft_neighbor); } + int getZright_neighbor() { return (zright_neighbor); } + int getXleft_neighbor_P() { return (xleft_neighbor_P); } + int getXright_neighbor_P() { return (xright_neighbor_P); } + int getYleft_neighbor_P() { return (yleft_neighbor_P); } + int getYright_neighbor_P() { return (yright_neighbor_P); } + int getZleft_neighbor_P() { return (zleft_neighbor_P); } + int getZright_neighbor_P() { return (zright_neighbor_P); } + bool getcVERBOSE() { return (cVERBOSE); } + int getCoordinates(int dir) { return (coordinates[dir]); } + int *getCoordinates() { return (coordinates); } + int getPeriods(int dir) { return (periods[dir]); } + MPI_Comm getComm(){ return (CART_COMM); } + private: /** New communicator with virtual cartesian topology */ @@ -178,215 +147,4 @@ class VCtopology3D:public VirtualTopology3D { bool cVERBOSE; }; -/** DEFINE THE Topology HERE, setting XLEN,YLEN,ZLEN */ -inline VCtopology3D::VCtopology3D() { - // ******************************************* - // ******************************************* - // change these values to change the topology - XLEN = 2; - YLEN = 2; - ZLEN = 1; - nprocs = XLEN * YLEN * ZLEN; - // here you have to set the topology for the fields - PERIODICX = true; - PERIODICY = false; - PERIODICZ = true; - // here you have to set the topology for the Particles - PERIODICX_P = true; - PERIODICY_P = false; - PERIODICZ_P = true; - // ******************************************* - // ******************************************* - XDIR = 0; - YDIR = 1; - ZDIR = 2; - RIGHT = 1; - LEFT = -1; - - reorder = 1; - - divisions[0] = XLEN; - divisions[1] = YLEN; - divisions[2] = ZLEN; - - periods[0] = PERIODICX; - periods[1] = PERIODICY; - periods[2] = PERIODICZ; - - periods_P[0] = PERIODICX_P; - periods_P[1] = PERIODICY_P; - periods_P[2] = PERIODICZ_P; - - - cVERBOSE = false; // communication verbose ? - -} - - - - - -/** Within CART_COMM, processes find about their new rank numbers, their cartesian coordinates, - and their neighbors */ -inline void VCtopology3D::setup_vctopology(MPI_Comm old_comm) { - // create a matrix with ranks, and neighbours for fields - MPI_Cart_create(old_comm, 3, divisions, periods, reorder, &CART_COMM); - // create a matrix with ranks, and neighbours for Particles - MPI_Cart_create(old_comm, 3, divisions, periods_P, reorder, &CART_COMM_P); - // field Communicator - if (CART_COMM != MPI_COMM_NULL) { - MPI_Comm_rank(CART_COMM, &cartesian_rank); - MPI_Cart_coords(CART_COMM, cartesian_rank, 3, coordinates); - - MPI_Cart_shift(CART_COMM, XDIR, RIGHT, &xleft_neighbor, &xright_neighbor); - MPI_Cart_shift(CART_COMM, YDIR, RIGHT, &yleft_neighbor, &yright_neighbor); - MPI_Cart_shift(CART_COMM, ZDIR, RIGHT, &zleft_neighbor, &zright_neighbor); - } - else { - // EXCEPTION - cout << "A process is trown away from the new topology for fields. VCtopology3D.h" << endl; - } - // Particles Communicator - if (CART_COMM_P != MPI_COMM_NULL) { - MPI_Comm_rank(CART_COMM_P, &cartesian_rank); - MPI_Cart_coords(CART_COMM_P, cartesian_rank, 3, coordinates); - - MPI_Cart_shift(CART_COMM_P, XDIR, RIGHT, &xleft_neighbor_P, &xright_neighbor_P); - MPI_Cart_shift(CART_COMM_P, YDIR, RIGHT, &yleft_neighbor_P, &yright_neighbor_P); - MPI_Cart_shift(CART_COMM_P, ZDIR, RIGHT, &zleft_neighbor_P, &zright_neighbor_P); - } - else { - // EXCEPTION - cout << "A process is trown away from the new topology for Particles. VCtopology3D.h" << endl; - } - -} -/** destructor */ -inline VCtopology3D::~VCtopology3D() { - -} -/** print topology info */ -inline void VCtopology3D::Print() { - cout << endl; - cout << "Virtual Cartesian Processors Topology" << endl; - cout << "-------------------------------------" << endl; - cout << "Processors grid: " << XLEN << "x" << YLEN << "x" << ZLEN << endl; - cout << "Periodicity Field X: " << periods[0] << endl; - cout << "Periodicity Field Y: " << periods[1] << endl; - cout << "Periodicity Field z: " << periods[2] << endl; - cout << "Periodicity Particles X: " << periods_P[0] << endl; - cout << "Periodicity Particles Y: " << periods_P[1] << endl; - cout << "Periodicity Particles z: " << periods_P[2] << endl; - cout << endl; -} -/** print cartesian rank of neighbors and coordinate of process */ -inline void VCtopology3D::PrintMapping() { - cout << endl; - cout << "Mapping of process " << cartesian_rank << endl; - cout << "----------------------" << endl; - cout << "Coordinates: X = " << coordinates[0] << "; Y = " << coordinates[1] << "; Z = " << coordinates[2] << endl; - cout << "Neighbors: xLeft = " << xleft_neighbor << "; xRight = " << xright_neighbor << "; yLeft = " << yleft_neighbor << "; yRight = " << yright_neighbor << "; zLeft = " << zleft_neighbor << "; zRight = " << zright_neighbor << endl; - cout << endl; -} -/** get XLEN */ -inline int VCtopology3D::getXLEN() { - return (XLEN); -} -/** get YLEN */ -inline int VCtopology3D::getYLEN() { - return (YLEN); -} -/** get ZLEN */ -inline int VCtopology3D::getZLEN() { - return (ZLEN); -} -/** get nprocs */ -inline int VCtopology3D::getNprocs() { - return (nprocs); -} -/** get periodicity on boundaries - DIRECTION X*/ -inline bool VCtopology3D::getPERIODICX() { - return (PERIODICX); -} -/** get periodicity on boundaries - DIRECTION Y*/ -inline bool VCtopology3D::getPERIODICY() { - return (PERIODICY); -} -/** get periodicity on boundaries - DIRECTION Z*/ -inline bool VCtopology3D::getPERIODICZ() { - return (PERIODICZ); -} -/** get the cartesian rank of the process */ -inline int VCtopology3D::getCartesian_rank() { - return (cartesian_rank); -} -/** get the cartesian rank of XLEFT neighbor */ -inline int VCtopology3D::getXleft_neighbor() { - return (xleft_neighbor); -} -/** get the cartesian rank of XRIGHT neighbor */ -inline int VCtopology3D::getXright_neighbor() { - return (xright_neighbor); -} -/** get the cartesian rank of YLEFT neighbor */ -inline int VCtopology3D::getYleft_neighbor() { - return (yleft_neighbor); -} -/** get the cartesian rank of YRIGHT neighbor */ -inline int VCtopology3D::getYright_neighbor() { - return (yright_neighbor); -} -/** get the cartesian rank of ZLEFT neighbor */ -inline int VCtopology3D::getZleft_neighbor() { - return (zleft_neighbor); -} -/** get the cartesian rank of ZRIGHT neighbor */ -inline int VCtopology3D::getZright_neighbor() { - return (zright_neighbor); -} -/** get the cartesian rank of XLEFT neighbor */ -inline int VCtopology3D::getXleft_neighbor_P() { - return (xleft_neighbor_P); -} -/** get the cartesian rank of XRIGHT neighbor */ -inline int VCtopology3D::getXright_neighbor_P() { - return (xright_neighbor_P); -} -/** get the cartesian rank of YLEFT neighbor */ -inline int VCtopology3D::getYleft_neighbor_P() { - return (yleft_neighbor_P); -} -/** get the cartesian rank of YRIGHT neighbor */ -inline int VCtopology3D::getYright_neighbor_P() { - return (yright_neighbor_P); -} -/** get the cartesian rank of ZLEFT neighbor */ -inline int VCtopology3D::getZleft_neighbor_P() { - return (zleft_neighbor_P); -} -/** get the cartesian rank of ZRIGHT neighbor */ -inline int VCtopology3D::getZright_neighbor_P() { - return (zright_neighbor_P); -} -/** if cVERBOSE == true, print to the screen all the comunication */ -inline bool VCtopology3D::getcVERBOSE() { - return (cVERBOSE); -} -/** get the coordinates in dir direction of process*/ -inline int VCtopology3D::getCoordinates(int dir) { - return (coordinates[dir]); -} -/** get the coordinates in dir direction of process*/ -inline int *VCtopology3D::getCoordinates() { - return (coordinates); -} -/** get Periodicity condition in dir direction */ -inline int VCtopology3D::getPeriods(int dir) { - return (periods[dir]); -} -/** Get the MPI communicator */ -inline MPI_Comm VCtopology3D::getComm(){ - return (CART_COMM); -} - #endif From d68535c54b5c87eb575f5649b502873fe6348958 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Thu, 26 Sep 2013 17:21:57 +0200 Subject: [PATCH 036/118] issue #48: executable is now created as exec/iPic3D in the build directory. --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fc94308a..c6b9473e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,7 @@ project(iPic3D) # Set exec path # -set(EXECUTABLE_OUTPUT_PATH work/${CMAKE_BUILD_TYPE}) +set(EXECUTABLE_OUTPUT_PATH exec/${CMAKE_BUILD_TYPE}) set(LIBRARY_OUTPUT_PATH lib) # @@ -147,7 +147,7 @@ target_link_libraries( ## to save the executable in the folder where the CMakeLists.txt file is, i.e. CMAKE_CURRENT_SOURCE_DIR -set_target_properties(iPic3D PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) +#set_target_properties(iPic3D PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) ## debug releases have a _d appended to the executable set_target_properties(iPic3D PROPERTIES DEBUG_POSTFIX "_d") From 46fd165ab06c80ee3826e65122a3b83df4604f22 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 1 Oct 2013 16:13:52 +0200 Subject: [PATCH 037/118] issue #49: create ipic scripts system: ctags and help --- scripts/ipic | 16 ++++++++++++++++ scripts/ipic-ctags | 8 ++++++++ scripts/ipic-help | 37 +++++++++++++++++++++++++++++++++++++ scripts/ipic-help-ctags | 22 ++++++++++++++++++++++ scripts/ipic-help-mic | 39 +++++++++++++++++++++++++++++++++++++++ scripts/makefiletags | 14 ++++++++++++++ scripts/tags | 7 +++++++ 7 files changed, 143 insertions(+) create mode 100755 scripts/ipic create mode 100755 scripts/ipic-ctags create mode 100755 scripts/ipic-help create mode 100755 scripts/ipic-help-ctags create mode 100755 scripts/ipic-help-mic create mode 100755 scripts/makefiletags create mode 100644 scripts/tags diff --git a/scripts/ipic b/scripts/ipic new file mode 100755 index 00000000..25158819 --- /dev/null +++ b/scripts/ipic @@ -0,0 +1,16 @@ +#!/bin/sh +if test $# -lt 1 +then + echo ' + usage: ipic + + Available ipic commands: + ipic ctags + ipic help +' + exit +fi +DIRNAME=`dirname $0` +APPENDIX="$1" +shift +exec "${DIRNAME}/ipic-${APPENDIX}" "$@" diff --git a/scripts/ipic-ctags b/scripts/ipic-ctags new file mode 100755 index 00000000..df4767be --- /dev/null +++ b/scripts/ipic-ctags @@ -0,0 +1,8 @@ +#!/bin/sh +DIRNAME=`dirname $0` +echo creating tags file using ctags +find . -name '*.cpp' -or -name '*.h' | xargs ctags --extra=+q +echo creating tag for each C++ file +find . -name '*.cpp' -or -name '*.h' | xargs $DIRNAME/makefiletags >> tags +echo sorting tags file +LC_ALL=C sort -u tags -o tags diff --git a/scripts/ipic-help b/scripts/ipic-help new file mode 100755 index 00000000..5dc5cf41 --- /dev/null +++ b/scripts/ipic-help @@ -0,0 +1,37 @@ +#!/bin/sh +if test $# -lt 1 +then + echo ' + To build, in the iPic3D directory you can use: + + rm -rf build # if necessary + mkdir build + cd build + cmake .. + make # or "make -j" to compile in parallel + + Before you build, you should first configure the number of MPI + processes you will use. To do so, you currently have to edit + "communication/VCtopology3D.cpp" (and then recompile in the build + directory). The lines you need to change are: + + XLEN = 2; + YLEN = 2; + ZLEN = 1; + + To run the code you can use + + mkdir data + mpiexec.hydra -n 4 -env OMP_NUM_THREADS=1 exec/iPic3D ../inputfiles/GEM.inp + + where 4 = XLEN times YLEN times ZLEN. + + Available subcommands: + ipic help mic +' + exit +fi +DIRNAME=`dirname $0` +APPENDIX="$1" +shift +exec "${DIRNAME}/ipic-help-${APPENDIX}" "$@" diff --git a/scripts/ipic-help-ctags b/scripts/ipic-help-ctags new file mode 100755 index 00000000..e80c00c1 --- /dev/null +++ b/scripts/ipic-help-ctags @@ -0,0 +1,22 @@ +#!/bin/sh +DIRNAME=`dirname $0` +SCRIPTSDIRNAME=`cd "${DIRNAME}"; pwd` +PARENTOFSCRIPTSDIRNAME=`dirname "${SCRIPTSDIRNAME}"` +#if test $# -lt 1 +#then + echo ' + Make sure that you are in the source code directory via e.g. + + cd '"${PARENTOFSCRIPTSDIRNAME}"' + + and then run the script, e.g. via + + '"${SCRIPTSDIRNAME}"'/ipic ctags + + or + + ipic ctags + + if you have '"${DIRNAME}"' in your path. +' + diff --git a/scripts/ipic-help-mic b/scripts/ipic-help-mic new file mode 100755 index 00000000..ecb81bce --- /dev/null +++ b/scripts/ipic-help-mic @@ -0,0 +1,39 @@ +#!/bin/sh +#if test $# -lt 1 +#then + echo ' + For the Xeon, you might want to change this to: + + XLEN = 4; + YLEN = 2; + ZLEN = 1; + + For the Xeon Phi, you might want: + + XLEN = 10; + YLEN = 5; + ZLEN = 1; + + Then to run the code you would use something like: + + mkdir data + mpiexec.hydra -n 8 -env OMP_NUM_THREADS=4 exec/iPic3D ../inputfiles/GEM.inp + + where 8 = XLEN times YLEN times ZLEN. + + If you want to cross-compile for the MIC, then the instructions are + different: + + mkdir build.phi + cd build.phi + cmake .. -DCMAKE_TOOLCHAIN_FILE=../cmake/cmake_template.cmake.XeonPhi + make -j + + And to run you use, e.g.: + + mkdir data + mpiexec.hydra -host knc2-mic0 -n 50 -env OMP_NUM_THREADS=4 exec/iPic3D ../inputfiles/GEM.inp + + where 50 = XLEN times YLEN times ZLEN. +' + diff --git a/scripts/makefiletags b/scripts/makefiletags new file mode 100755 index 00000000..da0fb7d1 --- /dev/null +++ b/scripts/makefiletags @@ -0,0 +1,14 @@ +#!/bin/sh +# generate a tag for each file name argument +ls $* 2>&1| sed 's/ /\ +/g' \ +| perl -ne ' + if(m@/@) { + m@(.*?)/([^/\n]+)$@; + print "$2\t$1/$2\t1\n"; + } else { + m@(.*)@; + print "$1\t$1\t1\n"; + } + ' +echo "tags tags 1" >> tags diff --git a/scripts/tags b/scripts/tags new file mode 100644 index 00000000..9f0219d6 --- /dev/null +++ b/scripts/tags @@ -0,0 +1,7 @@ +ipic ipic 1 +ipic-ctags ipic-ctags 1 +ipic-help ipic-help 1 +ipic-help-ctags ipic-help-ctags 1 +ipic-help-mic ipic-help-mic 1 +makefiletags makefiletags 1 +tags tags 1 From e3d1efd86d339fa9a017c87e08ff8e0e7d00cbb3 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 1 Oct 2013 23:33:04 +0200 Subject: [PATCH 038/118] Removing "siamo qua" comment. --- ConfigFile/src/ConfigFile.h | 1 - include/ConfigFile.h | 1 - 2 files changed, 2 deletions(-) diff --git a/ConfigFile/src/ConfigFile.h b/ConfigFile/src/ConfigFile.h index d8d8108a..02c89ea4 100644 --- a/ConfigFile/src/ConfigFile.h +++ b/ConfigFile/src/ConfigFile.h @@ -166,7 +166,6 @@ template <> inline bool ConfigFile::string_as_T < bool > (const string & s) { *p = toupper(*p); // make string all caps if (sup == string("FALSE") || sup == string("F") || sup == string("NO") || sup == string("N") || sup == string("0") || sup == string("NONE")) { b = false; - cout << "siamo qua " << endl; } return b; } diff --git a/include/ConfigFile.h b/include/ConfigFile.h index 79008be2..62f2d2db 100644 --- a/include/ConfigFile.h +++ b/include/ConfigFile.h @@ -166,7 +166,6 @@ template <> inline bool ConfigFile::string_as_T < bool > (const string & s) { *p = toupper(*p); // make string all caps if (sup == string("FALSE") || sup == string("F") || sup == string("NO") || sup == string("N") || sup == string("0") || sup == string("NONE")) { b = false; - cout << "siamo qua " << endl; } return b; } From 2ac8e7644b3b375e20a2711bd616b53782968e31 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 1 Oct 2013 23:52:14 +0200 Subject: [PATCH 039/118] iss #47: put XLEN and PERIODICX in GEM.inp; iss #50: CollectiveIO removed --- communication/VCtopology3D.cpp | 21 +- include/Collective.h | 283 ++++++++++--------------- include/CollectiveIO.h | 367 +++++++++++++++++---------------- include/VCtopology3D.h | 3 +- inputfiles/GEM.inp | 14 +- inputfiles/Random.inp | 10 +- inputoutput/Collective.cpp | 319 +--------------------------- main/iPic3Dlib.cpp | 2 +- 8 files changed, 335 insertions(+), 684 deletions(-) diff --git a/communication/VCtopology3D.cpp b/communication/VCtopology3D.cpp index 5d0da864..b88d0878 100644 --- a/communication/VCtopology3D.cpp +++ b/communication/VCtopology3D.cpp @@ -1,5 +1,6 @@ #include "mpi.h" #include "Alloc.h" +#include "Collective.h" #include "VCtopology3D.h" #include @@ -7,22 +8,22 @@ using std::cout; using std::endl; /** DEFINE THE Topology HERE, setting XLEN,YLEN,ZLEN */ -VCtopology3D::VCtopology3D() { +VCtopology3D::VCtopology3D(const Collective& col) { // ******************************************* // ******************************************* // change these values to change the topology - XLEN = 2; - YLEN = 2; - ZLEN = 1; + XLEN = col.getXLEN(); + YLEN = col.getYLEN(); + ZLEN = col.getZLEN(); nprocs = XLEN * YLEN * ZLEN; // here you have to set the topology for the fields - PERIODICX = true; - PERIODICY = false; - PERIODICZ = true; + PERIODICX = col.getPERIODICX(); + PERIODICY = col.getPERIODICY(); + PERIODICZ = col.getPERIODICZ(); // here you have to set the topology for the Particles - PERIODICX_P = true; - PERIODICY_P = false; - PERIODICZ_P = true; + PERIODICX_P = col.getPERIODICX(); + PERIODICY_P = col.getPERIODICY(); + PERIODICZ_P = col.getPERIODICZ(); // ******************************************* // ******************************************* XDIR = 0; diff --git a/include/Collective.h b/include/Collective.h index 7126522d..e4155bcf 100644 --- a/include/Collective.h +++ b/include/Collective.h @@ -8,25 +8,33 @@ #ifndef Collective_H #define Collective_H +#ifdef BATSRUS +#include "InterfaceFluid.h" +#endif #include -#include -#include +//#include +//#include #include #include #include #include "ConfigFile.h" #include "input_array.h" #include "hdf5.h" -#include "CollectiveIO.h" +//#include "CollectiveIO.h" +using namespace std; using std::cout; using std::endl; using std::ofstream; using namespace std; -class Collective:public CollectiveIO { +class Collective +#ifdef BATSRUS +: public InterfaceFluid +#endif +{ public: /*! constructor: initialize physical parameters with values */ Collective(int argc, char **argv); @@ -40,174 +48,96 @@ class Collective:public CollectiveIO { void Print(); /*! save setting in a file */ void save(); - /*! get the physical space dimensions */ - int getDim(); - /*! Get length of the system - direction X */ - double getLx(); - /*! Get length of the system - direction Y */ - double getLy(); - /*! Get length of the system - direction Z */ - double getLz(); - /*! Get object center - direction X */ - double getx_center(); - /*! Get object center - direction Y */ - double gety_center(); - /*! Get object center - direction Z */ - double getz_center(); - /*! Get object size - cubic box */ - double getL_square(); - /*! Get the number of cells - direction X */ - int getNxc(); - /*! Get the number of cells - direction Y */ - int getNyc(); - /*! Get the number of cells - direction Z */ - int getNzc(); - /*! Get the grid spacing - direction X */ - double getDx(); - /*! Get the grid spacing - direction Y */ - double getDy(); - /*! Get the grid spacing - direction Z */ - double getDz(); - /*! get the light speed */ - double getC(); - /*! get the time step */ - double getDt(); - /*! get the decentering parameter */ - double getTh(); - /*! get the Smoothing value */ - double getSmooth(); - /*! get the number of time cycles */ - int getNcycles(); - /*! get the number of species */ - int getNs(); - /*! get the number of particles for different species */ - long getNp(int nspecies); - /*! get the number of particles per cell */ - int getNpcel(int nspecies); - /*! get the number of particles per cell - direction X */ - int getNpcelx(int nspecies); - /*! get the number of particles per cell - direction Y */ - int getNpcely(int nspecies); - /*! get the number of particles per cell - direction Z */ - int getNpcelz(int nspecies); - /*! get maximum number of particles for different species */ - long getNpMax(int nspecies); - /*! NpMax/Np is the ratio between the maximum number of particles allowed on a processor and the number of particles */ - double getNpMaxNpRatio(); - /*! get charge to mass ratio for different species */ - double getQOM(int nspecies); - /*! get background charge for GEM challenge */ - double getRHOinit(int nspecies); - /*! get rho injection */ - double getRHOinject(int nspecies); - /*! get thermal velocity - X direction */ - double getUth(int nspecies); - /*! get thermal velocity - Y direction */ - double getVth(int nspecies); - /*! get thermal velocity - Z direction */ - double getWth(int nspecies); - /*! get Drift velocity - Direction X */ - double getU0(int nspecies); - /*! get Drift velocity - Direction Y */ - double getV0(int nspecies); - /*! get Drift velocity - Direction Z */ - double getW0(int nspecies); - /*! get the boolean value for TrackParticleID */ - bool getTrackParticleID(int nspecies); - /*! get SaveDirName */ - string getSaveDirName(); - /*! get last_cycle */ - int getLast_cycle(); - /*! get RestartDirName */ - string getRestartDirName(); - - /*! get Case type */ - string getCase(); - /*! get output writing method */ - string getWriteMethod(); - /*! get simulation name */ - string getSimName(); - /*! get Poisson correction flag */ - string getPoissonCorrection(); - - /*! get Boundary Condition Particles: FaceXright */ - int getBcPfaceXright(); - /*! get Boundary Condition Particles: FaceXleft */ - int getBcPfaceXleft(); - /*! get Boundary Condition Particles: FaceYright */ - int getBcPfaceYright(); - /*! get Boundary Condition Particles: FaceYleft */ - int getBcPfaceYleft(); - /*! get Boundary Condition Particles: FaceYright */ - int getBcPfaceZright(); - /*! get Boundary Condition Particles: FaceYleft */ - int getBcPfaceZleft(); - - /*! get Boundary Condition Electrostatic Potential: FaceXright */ - int getBcPHIfaceXright(); - /*! get Boundary Condition Electrostatic Potential:FaceXleft */ - int getBcPHIfaceXleft(); - /*! get Boundary Condition Electrostatic Potential:FaceYright */ - int getBcPHIfaceYright(); - /*! get Boundary Condition Electrostatic Potential:FaceYleft */ - int getBcPHIfaceYleft(); - /*! get Boundary Condition Electrostatic Potential:FaceYright */ - int getBcPHIfaceZright(); - /*! get Boundary Condition Electrostatic Potential:FaceYleft */ - int getBcPHIfaceZleft(); - - /*! get Boundary ConditionElectric Field: FaceXright */ - int getBcEMfaceXright(); - /*! get Boundary Condition Electric Field: FaceXleft */ - int getBcEMfaceXleft(); - /*! get Boundary Condition Electric Field: FaceYright */ - int getBcEMfaceYright(); - /*! get Boundary Condition Electric Field: FaceYleft */ - int getBcEMfaceYleft(); - /*! get Boundary Condition Electric Field: FaceZright */ - int getBcEMfaceZright(); - /*! get Boundary Condition Electric Field: FaceZleft */ - int getBcEMfaceZleft(); - - /*! get RESTART */ - int getRestart_status(); - - /*! get the sheet thickness */ - double getDelta(); - /*! get the amplitude of the magnetic field along x */ - double getB0x(); - /*! get the amplitude of the magnetic field along y */ - double getB0y(); - /*! get the amplitude of the magnetic field along z */ - double getB0z(); - /*! get the amplitude of the magnetic field 1 along x */ - double getB1x(); - /*! get the amplitude of the magnetic field 1 along y */ - double getB1y(); - /*! get the amplitude of the magnetic field 1 along z */ - double getB1z(); - - /*! get the boolean value for verbose results */ - bool getVerbose(); - - /*! get the velocity of injection of the plasma from the wall */ - double getVinj(); - - /*! get the converging tolerance for CG solver */ - double getCGtol(); - /*! get the converging tolerance for GMRES solver */ - double getGMREStol(); - /*! get the numbers of iteration for the PC mover */ - int getNiterMover(); - /*! output of fields */ - int getFieldOutputCycle(); - /*! output of particles */ - int getParticlesOutputCycle(); - /*! output of restart */ - int getRestartOutputCycle(); - /*! output of diagnostics */ - int getDiagnosticsOutputCycle(); + // accessors + // + int getDim()const{ return (dim); } + double getLx()const{ return (Lx); } + double getLy()const{ return (Ly); } + double getLz()const{ return (Lz); } + double getx_center()const{ return (x_center); } + double gety_center()const{ return (y_center); } + double getz_center()const{ return (z_center); } + double getL_square()const{ return (L_square); } + int getNxc()const{ return (nxc); } + int getNyc()const{ return (nyc); } + int getNzc()const{ return (nzc); } + int getXLEN()const{ return (XLEN); } + int getYLEN()const{ return (YLEN); } + int getZLEN()const{ return (ZLEN); } + bool getPERIODICX()const{ return (PERIODICX); } + bool getPERIODICY()const{ return (PERIODICY); } + bool getPERIODICZ()const{ return (PERIODICZ); } + double getDx()const{ return (dx); } + double getDy()const{ return (dy); } + double getDz()const{ return (dz); } + double getC()const{ return (c); } + double getDt()const{ return (dt); } + double getTh()const{ return (th); } + double getSmooth()const{ return (Smooth); } + int getNcycles()const{ return (ncycles); } + int getNs()const{ return (ns); } + int getNpcel(int nspecies)const{ return (npcel[nspecies]); } + int getNpcelx(int nspecies)const{ return (npcelx[nspecies]); } + int getNpcely(int nspecies)const{ return (npcely[nspecies]); } + int getNpcelz(int nspecies)const{ return (npcelz[nspecies]); } + long getNp(int nspecies)const{ return (np[nspecies]); } + long getNpMax(int nspecies)const{ return (npMax[nspecies]); } + double getNpMaxNpRatio()const{ return (NpMaxNpRatio); } + double getQOM(int nspecies)const{ return (qom[nspecies]); } + double getRHOinit(int nspecies)const{ return (rhoINIT[nspecies]); } + double getRHOinject(int nspecies)const { return(rhoINJECT[nspecies]); } + double getUth(int nspecies)const{ return (uth[nspecies]); } + double getVth(int nspecies)const{ return (vth[nspecies]); } + double getWth(int nspecies)const{ return (wth[nspecies]); } + double getU0(int nspecies)const{ return (u0[nspecies]); } + double getV0(int nspecies)const{ return (v0[nspecies]); } + double getW0(int nspecies)const{ return (w0[nspecies]); } + int getBcPfaceXright()const{ return (bcPfaceXright); } + int getBcPfaceXleft()const{ return (bcPfaceXleft); } + int getBcPfaceYright()const{ return (bcPfaceYright); } + int getBcPfaceYleft()const{ return (bcPfaceYleft); } + int getBcPfaceZright()const{ return (bcPfaceZright); } + int getBcPfaceZleft()const{ return (bcPfaceZleft); } + int getBcPHIfaceXright()const{ return (bcPHIfaceXright); } + int getBcPHIfaceXleft()const{ return (bcPHIfaceXleft); } + int getBcPHIfaceYright()const{ return (bcPHIfaceYright); } + int getBcPHIfaceYleft()const{ return (bcPHIfaceYleft); } + int getBcPHIfaceZright()const{ return (bcPHIfaceZright); } + int getBcPHIfaceZleft()const{ return (bcPHIfaceZleft); } + int getBcEMfaceXright()const{ return (bcEMfaceXright); } + int getBcEMfaceXleft()const{ return (bcEMfaceXleft); } + int getBcEMfaceYright()const{ return (bcEMfaceYright); } + int getBcEMfaceYleft()const{ return (bcEMfaceYleft); } + int getBcEMfaceZright()const{ return (bcEMfaceZright); } + int getBcEMfaceZleft()const{ return (bcEMfaceZleft); } + double getDelta()const{ return (delta); } + double getB0x()const{ return (B0x); } + double getB0y()const{ return (B0y); } + double getB0z()const{ return (B0z); } + double getB1x()const{ return (B1x); } + double getB1y()const{ return (B1y); } + double getB1z()const{ return (B1z); } + bool getVerbose()const{ return (verbose); } + bool getTrackParticleID(int nspecies)const + { return (TrackParticleID[nspecies]); } + int getRestart_status()const{ return (restart_status); } + string getSaveDirName()const{ return (SaveDirName); } + string getRestartDirName()const{ return (RestartDirName); } + string getinputfile()const{ return (inputfile); } + string getCase()const{ return (Case); } + string getSimName()const{ return (SimName); } + string getWriteMethod()const{ return (wmethod); } + string getPoissonCorrection()const{ return (PoissonCorrection); } + int getLast_cycle()const{ return (last_cycle); } + double getVinj()const{ return (Vinj); } + double getCGtol()const{ return (CGtol); } + double getGMREStol()const{ return (GMREStol); } + int getNiterMover()const{ return (NiterMover); } + int getFieldOutputCycle()const{ return (FieldOutputCycle); } + int getParticlesOutputCycle()const{ return (ParticlesOutputCycle); } + int getRestartOutputCycle()const{ return (RestartOutputCycle); } + int getDiagnosticsOutputCycle()const{ return (DiagnosticsOutputCycle); } /*! Boundary condition selection for BCFace for the electric field components */ int bcEx[6], bcEy[6], bcEz[6]; @@ -257,6 +187,14 @@ class Collective:public CollectiveIO { double dy; /*! grid spacing - Z direction */ double dz; + /*! number of MPI subdomains in each direction */ + int XLEN; + int YLEN; + int ZLEN; + /*! periodicity in each direction */ + bool PERIODICX; + bool PERIODICY; + bool PERIODICZ; /*! number of species */ int ns; /*! number of particles per cell */ @@ -307,8 +245,6 @@ class Collective:public CollectiveIO { string SaveDirName; /*! RestartDirName */ string RestartDirName; - /*! get inputfile */ - string getinputfile(); /*! restart_status 0 --> no restart; 1--> restart, create new; 2--> restart, append; */ int restart_status; /*! last cycle */ @@ -393,5 +329,6 @@ class Collective:public CollectiveIO { /*! Output for diagnostics */ int DiagnosticsOutputCycle; }; +typedef Collective CollectiveIO; #endif diff --git a/include/CollectiveIO.h b/include/CollectiveIO.h index 2fd7ffe5..f208b5f6 100644 --- a/include/CollectiveIO.h +++ b/include/CollectiveIO.h @@ -8,16 +8,17 @@ developers : Stefano Markidis, Giovanni Lapenta #ifndef CollectiveIO_H #define CollectiveIO_H -#include -#include -#include -#include +#include "Collective.h" +//#include +//#include +//#include +//#include -#ifdef BATSRUS -#include "InterfaceFluid.h" -#endif +//#ifdef BATSRUS +//#include "InterfaceFluid.h" +//#endif -using namespace std; +//using namespace std; /** * Abstract base class for inputing physical parameters for simulation. * @@ -28,179 +29,179 @@ using namespace std; * @version 1.0 */ -#ifdef BATSRUS -class CollectiveIO : public InterfaceFluid{ -#else -class CollectiveIO { -#endif -public: - /** read input file */ - virtual void ReadInput(string inputfile) = 0; - /** read the restart input file from HDF5 */ - virtual int ReadRestart(string inputfile) = 0; - /** print simulation parameters */ - virtual void Print(void) = 0; - /** print simulation parameters */ - virtual void save(void) = 0; - /** get the physical space dimensions */ - virtual int getDim() = 0; - /** get simulation box length - direction X */ - virtual double getLx(void) = 0; - /** get simulation box length - direction Y */ - virtual double getLy(void) = 0; - /** get simulation box length - direction Z */ - virtual double getLz(void) = 0; - /** get object center - direction X */ - virtual double getx_center(void) = 0; - /** get object center - direction Y */ - virtual double gety_center(void) = 0; - /** get object center - direction Z */ - virtual double getz_center(void) = 0; - /** get object size - cubic box */ - virtual double getL_square(void) = 0; - /** get number of cells - direction X */ - virtual int getNxc(void) = 0; - /** get number of cells - direction Y */ - virtual int getNyc(void) = 0; - /** get number of cells - direction Z */ - virtual int getNzc(void) = 0; - /** get grid spacing - direction X */ - virtual double getDx(void) = 0; - /** get grid spacing - direction Y */ - virtual double getDy(void) = 0; - /** get grid spacing - direction z */ - virtual double getDz(void) = 0; - /** get the light speed */ - virtual double getC() = 0; - /** get the time step */ - virtual double getDt() = 0; - /** get the decentering parameter */ - virtual double getTh() = 0; - /** get the Smoothing value*/ - virtual double getSmooth() = 0; - /** get the number of time cycles */ - virtual int getNcycles() = 0; - /** get the number of species */ - virtual int getNs() = 0; - /** get the number of particles array for different species */ - virtual long getNp(int nspecies) = 0; - /** get the number of particles per cell */ - virtual int getNpcel(int nspecies) = 0; - /** get the number of particles per cell - direction X */ - virtual int getNpcelx(int nspecies) = 0; - /** get the number of particles per cell - direction Y */ - virtual int getNpcely(int nspecies) = 0; - /** get the number of particles per cell - direction Z */ - virtual int getNpcelz(int nspecies) = 0; - /** get maximum number of particles for different species */ - virtual long getNpMax(int nspecies) = 0; - /** NpMax/Np is the ratio between the maximum number of particles allowed on a processor and the number of particles*/ - virtual double getNpMaxNpRatio() = 0; - /** get charge to mass ratio for different species */ - virtual double getQOM(int nspecies) = 0; - /** get background charge for GEM challenge */ - virtual double getRHOinit(int nspecies) = 0; - /** get rho for injection */ - virtual double getRHOinject(int nspecies)=0; - /** get thermal velocity - X direction */ - virtual double getUth(int nspecies) = 0; - /** get thermal velocity - Y direction */ - virtual double getVth(int nspecies) = 0; - /** get thermal velocity - Z direction */ - virtual double getWth(int nspecies) = 0; - /** get Drift velocity - Direction X */ - virtual double getU0(int nspecies) = 0; - /** get Drift velocity - Direction Y */ - virtual double getV0(int nspecies) = 0; - /** get Drift velocity - Direction Z */ - virtual double getW0(int nspecies) = 0; - /** get the boolean value for TrackParticleID */ - virtual bool getTrackParticleID(int nspecies) = 0; - /** get SaveDirName */ - virtual string getSaveDirName() = 0; - /** get last_cycle */ - virtual int getLast_cycle() = 0; - /** get RestartDirName */ - virtual string getRestartDirName() = 0; - - /** get Case type */ - virtual string getCase() = 0; - /** get simulation name */ - virtual string getSimName() = 0; - /** get Poisson correction flag */ - virtual string getPoissonCorrection() = 0; - - /** get Boundary Condition Particles: FaceXright */ - virtual int getBcPfaceXright() = 0; - /** get Boundary Condition Particles: FaceXleft */ - virtual int getBcPfaceXleft() = 0; - /** get Boundary Condition Particles: FaceYright */ - virtual int getBcPfaceYright() = 0; - /** get Boundary Condition Particles: FaceYleft */ - virtual int getBcPfaceYleft() = 0; - /** get Boundary Condition Particles: FaceYright */ - virtual int getBcPfaceZright() = 0; - /** get Boundary Condition Particles: FaceYleft */ - virtual int getBcPfaceZleft() = 0; - - /** get Boundary Condition Electrostatic Potential: FaceXright */ - virtual int getBcPHIfaceXright() = 0; - /** get Boundary Condition Electrostatic Potential:FaceXleft */ - virtual int getBcPHIfaceXleft() = 0; - /** get Boundary Condition Electrostatic Potential:FaceYright */ - virtual int getBcPHIfaceYright() = 0; - /** get Boundary Condition Electrostatic Potential:FaceYleft */ - virtual int getBcPHIfaceYleft() = 0; - /** get Boundary Condition Electrostatic Potential:FaceYright */ - virtual int getBcPHIfaceZright() = 0; - /** get Boundary Condition Electrostatic Potential:FaceYleft */ - virtual int getBcPHIfaceZleft() = 0; - - /** get Boundary ConditionElectric Field: FaceXright */ - virtual int getBcEMfaceXright() = 0; - /** get Boundary Condition Electric Field: FaceXleft */ - virtual int getBcEMfaceXleft() = 0; - /** get Boundary Condition Electric Field: FaceYright */ - virtual int getBcEMfaceYright() = 0; - /** get Boundary Condition Electric Field: FaceYleft */ - virtual int getBcEMfaceYleft() = 0; - /** get Boundary Condition Electric Field: FaceYright */ - virtual int getBcEMfaceZright() = 0; - /** get Boundary Condition Electric Field: FaceYleft */ - virtual int getBcEMfaceZleft() = 0; - - /** get RESTART */ - virtual int getRestart_status() = 0; - - - /** Get GEM Challenge parameters */ - - virtual double getDelta() = 0; - virtual double getB0x() = 0; - virtual double getB0y() = 0; - virtual double getB0z() = 0; - - /** get the boolean value for verbose results */ - virtual bool getVerbose() = 0; - - /** get the converging tolerance for CG solver */ - virtual double getCGtol() = 0; - /** get the converging tolerance for GMRES solver */ - virtual double getGMREStol() = 0; - /** get the numbers of iteration for the PC mover */ - virtual int getNiterMover() = 0; - - /** output of fields */ - virtual int getFieldOutputCycle() = 0; - /** output of fields */ - virtual int getParticlesOutputCycle() = 0; - /** output of fields */ - virtual int getRestartOutputCycle() = 0; - /** output of fields */ - virtual int getDiagnosticsOutputCycle() = 0; - - /** get the velocity of injection of the plasma from the wall */ - virtual double getVinj() = 0; - -}; +//#ifdef BATSRUS +//class CollectiveIO : public InterfaceFluid{ +//#else +//class CollectiveIO { +//#endif +//public: +// /** read input file */ +// virtual void ReadInput(string inputfile) = 0; +// /** read the restart input file from HDF5 */ +// virtual int ReadRestart(string inputfile) = 0; +// /** print simulation parameters */ +// virtual void Print(void) = 0; +// /** print simulation parameters */ +// virtual void save(void) = 0; +// /** get the physical space dimensions */ +// virtual int getDim() = 0; +// /** get simulation box length - direction X */ +// virtual double getLx(void) = 0; +// /** get simulation box length - direction Y */ +// virtual double getLy(void) = 0; +// /** get simulation box length - direction Z */ +// virtual double getLz(void) = 0; +// /** get object center - direction X */ +// virtual double getx_center(void) = 0; +// /** get object center - direction Y */ +// virtual double gety_center(void) = 0; +// /** get object center - direction Z */ +// virtual double getz_center(void) = 0; +// /** get object size - cubic box */ +// virtual double getL_square(void) = 0; +// /** get number of cells - direction X */ +// virtual int getNxc(void) = 0; +// /** get number of cells - direction Y */ +// virtual int getNyc(void) = 0; +// /** get number of cells - direction Z */ +// virtual int getNzc(void) = 0; +// /** get grid spacing - direction X */ +// virtual double getDx(void) = 0; +// /** get grid spacing - direction Y */ +// virtual double getDy(void) = 0; +// /** get grid spacing - direction z */ +// virtual double getDz(void) = 0; +// /** get the light speed */ +// virtual double getC() = 0; +// /** get the time step */ +// virtual double getDt() = 0; +// /** get the decentering parameter */ +// virtual double getTh() = 0; +// /** get the Smoothing value*/ +// virtual double getSmooth() = 0; +// /** get the number of time cycles */ +// virtual int getNcycles() = 0; +// /** get the number of species */ +// virtual int getNs() = 0; +// /** get the number of particles array for different species */ +// virtual long getNp(int nspecies) = 0; +// /** get the number of particles per cell */ +// virtual int getNpcel(int nspecies) = 0; +// /** get the number of particles per cell - direction X */ +// virtual int getNpcelx(int nspecies) = 0; +// /** get the number of particles per cell - direction Y */ +// virtual int getNpcely(int nspecies) = 0; +// /** get the number of particles per cell - direction Z */ +// virtual int getNpcelz(int nspecies) = 0; +// /** get maximum number of particles for different species */ +// virtual long getNpMax(int nspecies) = 0; +// /** NpMax/Np is the ratio between the maximum number of particles allowed on a processor and the number of particles*/ +// virtual double getNpMaxNpRatio() = 0; +// /** get charge to mass ratio for different species */ +// virtual double getQOM(int nspecies) = 0; +// /** get background charge for GEM challenge */ +// virtual double getRHOinit(int nspecies) = 0; +// /** get rho for injection */ +// virtual double getRHOinject(int nspecies)=0; +// /** get thermal velocity - X direction */ +// virtual double getUth(int nspecies) = 0; +// /** get thermal velocity - Y direction */ +// virtual double getVth(int nspecies) = 0; +// /** get thermal velocity - Z direction */ +// virtual double getWth(int nspecies) = 0; +// /** get Drift velocity - Direction X */ +// virtual double getU0(int nspecies) = 0; +// /** get Drift velocity - Direction Y */ +// virtual double getV0(int nspecies) = 0; +// /** get Drift velocity - Direction Z */ +// virtual double getW0(int nspecies) = 0; +// /** get the boolean value for TrackParticleID */ +// virtual bool getTrackParticleID(int nspecies) = 0; +// /** get SaveDirName */ +// virtual string getSaveDirName() = 0; +// /** get last_cycle */ +// virtual int getLast_cycle() = 0; +// /** get RestartDirName */ +// virtual string getRestartDirName() = 0; +// +// /** get Case type */ +// virtual string getCase() = 0; +// /** get simulation name */ +// virtual string getSimName() = 0; +// /** get Poisson correction flag */ +// virtual string getPoissonCorrection() = 0; +// +// /** get Boundary Condition Particles: FaceXright */ +// virtual int getBcPfaceXright() = 0; +// /** get Boundary Condition Particles: FaceXleft */ +// virtual int getBcPfaceXleft() = 0; +// /** get Boundary Condition Particles: FaceYright */ +// virtual int getBcPfaceYright() = 0; +// /** get Boundary Condition Particles: FaceYleft */ +// virtual int getBcPfaceYleft() = 0; +// /** get Boundary Condition Particles: FaceYright */ +// virtual int getBcPfaceZright() = 0; +// /** get Boundary Condition Particles: FaceYleft */ +// virtual int getBcPfaceZleft() = 0; +// +// /** get Boundary Condition Electrostatic Potential: FaceXright */ +// virtual int getBcPHIfaceXright() = 0; +// /** get Boundary Condition Electrostatic Potential:FaceXleft */ +// virtual int getBcPHIfaceXleft() = 0; +// /** get Boundary Condition Electrostatic Potential:FaceYright */ +// virtual int getBcPHIfaceYright() = 0; +// /** get Boundary Condition Electrostatic Potential:FaceYleft */ +// virtual int getBcPHIfaceYleft() = 0; +// /** get Boundary Condition Electrostatic Potential:FaceYright */ +// virtual int getBcPHIfaceZright() = 0; +// /** get Boundary Condition Electrostatic Potential:FaceYleft */ +// virtual int getBcPHIfaceZleft() = 0; +// +// /** get Boundary ConditionElectric Field: FaceXright */ +// virtual int getBcEMfaceXright() = 0; +// /** get Boundary Condition Electric Field: FaceXleft */ +// virtual int getBcEMfaceXleft() = 0; +// /** get Boundary Condition Electric Field: FaceYright */ +// virtual int getBcEMfaceYright() = 0; +// /** get Boundary Condition Electric Field: FaceYleft */ +// virtual int getBcEMfaceYleft() = 0; +// /** get Boundary Condition Electric Field: FaceYright */ +// virtual int getBcEMfaceZright() = 0; +// /** get Boundary Condition Electric Field: FaceYleft */ +// virtual int getBcEMfaceZleft() = 0; +// +// /** get RESTART */ +// virtual int getRestart_status() = 0; +// +// +// /** Get GEM Challenge parameters */ +// +// virtual double getDelta() = 0; +// virtual double getB0x() = 0; +// virtual double getB0y() = 0; +// virtual double getB0z() = 0; +// +// /** get the boolean value for verbose results */ +// virtual bool getVerbose() = 0; +// +// /** get the converging tolerance for CG solver */ +// virtual double getCGtol() = 0; +// /** get the converging tolerance for GMRES solver */ +// virtual double getGMREStol() = 0; +// /** get the numbers of iteration for the PC mover */ +// virtual int getNiterMover() = 0; +// +// /** output of fields */ +// virtual int getFieldOutputCycle() = 0; +// /** output of fields */ +// virtual int getParticlesOutputCycle() = 0; +// /** output of fields */ +// virtual int getRestartOutputCycle() = 0; +// /** output of fields */ +// virtual int getDiagnosticsOutputCycle() = 0; +// +// /** get the velocity of injection of the plasma from the wall */ +// virtual double getVinj() = 0; +// +//}; #endif diff --git a/include/VCtopology3D.h b/include/VCtopology3D.h index 2bab31ed..97f28788 100644 --- a/include/VCtopology3D.h +++ b/include/VCtopology3D.h @@ -27,11 +27,12 @@ developers : Stefano Markidis, Giovanni Lapenta * @version 2.0 */ +class Collective; class VCtopology3D:public VirtualTopology3D { public: /** constructor: Define topology parameters: dimension, domain decomposition,... */ - VCtopology3D(); + VCtopology3D(const Collective& col); /** destructor */ ~VCtopology3D(); /** Find the neighbors in the new communicator */ diff --git a/inputfiles/GEM.inp b/inputfiles/GEM.inp index ba6173c2..e6f9bc23 100644 --- a/inputfiles/GEM.inp +++ b/inputfiles/GEM.inp @@ -47,10 +47,20 @@ y_center = 1. # Ly = simulation box length - y direction in m z_center = 1. # Lz = simulation box length - z direction in m L_square = .1 -nxc = 128 # nxc = number of cells - x direction -nyc = 128 # nyc = number of cells - y direction +nxc = 120 # nxc = number of cells - x direction +nyc = 120 # nyc = number of cells - y direction nzc = 1 # nzc = number of cells - z direction +# %%%%%%%%%%%%%% MPI TOPOLOGY %%%%%%%%%%%%%% +# number of MPI subdomains in each direction +XLEN = 4 +YLEN = 4 +ZLEN = 1 +# topology of subdomains in each dimension (1=true, 0=false) +PERIODICX = 1 +PERIODICY = 0 +PERIODICZ = 1 + # %%%%%%%%%%%%%% PARTICLES %%%%%%%%%%%%%%%%% # ns = number of species # 0 = electrons diff --git a/inputfiles/Random.inp b/inputfiles/Random.inp index b48ef3a4..b2ee56fe 100644 --- a/inputfiles/Random.inp +++ b/inputfiles/Random.inp @@ -49,7 +49,15 @@ L_square = .1 nxc = 120 # nxc = number of cells - x direction nyc = 120 # nyc = number of cells - y direction nzc = 1 # nzc = number of cells - z direction - +# %%%%%%%%%%%%%% MPI TOPOLOGY %%%%%%%%%%%%%% +# number of MPI subdomains in each direction +XLEN = 2 +YLEN = 2 +ZLEN = 1 +# topology of subdomains in each dimension (1=true, 0=false) +PERIODICX = 1 +PERIODICY = 1 +PERIODICZ = 1 # %%%%%%%%%%%%%% PARTICLES %%%%%%%%%%%%%%%%% # ns = number of species # 0 = electrons diff --git a/inputoutput/Collective.cpp b/inputoutput/Collective.cpp index 140e1647..da721b76 100644 --- a/inputoutput/Collective.cpp +++ b/inputoutput/Collective.cpp @@ -115,6 +115,12 @@ void Collective::ReadInput(string inputfile) { nyc = config.read < int >("nyc"); nzc = config.read < int >("nzc"); #endif + XLEN = config.read < int >("XLEN",1); + YLEN = config.read < int >("YLEN",1); + ZLEN = config.read < int >("ZLEN",1); + PERIODICX = config.read < bool >("PERIODICX"); + PERIODICY = config.read < bool >("PERIODICY"); + PERIODICZ = config.read < bool >("PERIODICZ"); x_center = config.read < double >("x_center"); y_center = config.read < double >("y_center"); @@ -707,316 +713,3 @@ void Collective::save() { } -/*! get the physical space dimensions */ -int Collective::getDim() { - return (dim); -} -/*! get Lx */ -double Collective::getLx() { - return (Lx); -} -/*! get Ly */ -double Collective::getLy() { - return (Ly); -} -/*! get Lz */ -double Collective::getLz() { - return (Lz); -} -/*! get x_center */ -double Collective::getx_center() { - return (x_center); -} -/*! get y_center */ -double Collective::gety_center() { - return (y_center); -} -/*! get z_center */ -double Collective::getz_center() { - return (z_center); -} -/*! get L_square */ -double Collective::getL_square() { - return (L_square); -} -/*! get nxc */ -int Collective::getNxc() { - return (nxc); -} -/*! get nyx */ -int Collective::getNyc() { - return (nyc); -} -/*! get nzc */ -int Collective::getNzc() { - return (nzc); -} -/*! get dx */ -double Collective::getDx() { - return (dx); -} -/*! get dy */ -double Collective::getDy() { - return (dy); -} -/*! get dz */ -double Collective::getDz() { - return (dz); -} -/*! get the light speed */ -double Collective::getC() { - return (c); -} -/*! get the time step */ -double Collective::getDt() { - return (dt); -} -/*! get the decentering parameter */ -double Collective::getTh() { - return (th); -} -/*! get the smooth parameter */ -double Collective::getSmooth() { - return (Smooth); -} - -/*! get the number of time cycles */ -int Collective::getNcycles() { - return (ncycles); -} -/*! get the number of species */ -int Collective::getNs() { - return (ns); -} -/*! get the number of particles per cell for species nspecies */ -int Collective::getNpcel(int nspecies) { - return (npcel[nspecies]); -} -/*! get the number of particles per cell for species nspecies - direction X */ -int Collective::getNpcelx(int nspecies) { - return (npcelx[nspecies]); -} -/*! get the number of particles per cell for species nspecies - direction Y */ -int Collective::getNpcely(int nspecies) { - return (npcely[nspecies]); -} -/*! get the number of particles per cell for species nspecies - direction Z */ -int Collective::getNpcelz(int nspecies) { - return (npcelz[nspecies]); -} -/*! get the number of particles for different species */ -long Collective::getNp(int nspecies) { - return (np[nspecies]); -} -/*! get maximum number of particles for different species */ -long Collective::getNpMax(int nspecies) { - return (npMax[nspecies]); -} -double Collective::getNpMaxNpRatio() { - return (NpMaxNpRatio); -} -/*! get charge to mass ratio for different species */ -double Collective::getQOM(int nspecies) { - return (qom[nspecies]); -} -/*! get the background density for GEM challenge */ -double Collective::getRHOinit(int nspecies) { - return (rhoINIT[nspecies]); -} -/*! get the background density for GEM challenge */ -inline double Collective::getRHOinject(int nspecies){ - return(rhoINJECT[nspecies]); -} -/*! get thermal velocity - Direction X */ -double Collective::getUth(int nspecies) { - return (uth[nspecies]); -} -/*! get thermal velocity - Direction Y */ -double Collective::getVth(int nspecies) { - return (vth[nspecies]); -} -/*! get thermal velocity - Direction Z */ -double Collective::getWth(int nspecies) { - return (wth[nspecies]); -} -/*! get beam velocity - Direction X */ -double Collective::getU0(int nspecies) { - return (u0[nspecies]); -} -/*! get beam velocity - Direction Y */ -double Collective::getV0(int nspecies) { - return (v0[nspecies]); -} -/*! get beam velocity - Direction Z */ -double Collective::getW0(int nspecies) { - return (w0[nspecies]); -} -/*! get Boundary Condition Particles: FaceXright */ -int Collective::getBcPfaceXright() { - return (bcPfaceXright); -} -/*! get Boundary Condition Particles: FaceXleft */ -int Collective::getBcPfaceXleft() { - return (bcPfaceXleft); -} -/*! get Boundary Condition Particles: FaceYright */ -int Collective::getBcPfaceYright() { - return (bcPfaceYright); -} -/*! get Boundary Condition Particles: FaceYleft */ -int Collective::getBcPfaceYleft() { - return (bcPfaceYleft); -} -/*! get Boundary Condition Particles: FaceZright */ -int Collective::getBcPfaceZright() { - return (bcPfaceZright); -} -/*! get Boundary Condition Particles: FaceZleft */ -int Collective::getBcPfaceZleft() { - return (bcPfaceZleft); -} -/*! get Boundary Condition Electrostatic Potential: FaceXright */ -int Collective::getBcPHIfaceXright() { - return (bcPHIfaceXright); -} -/*! get Boundary Condition Electrostatic Potential:FaceXleft */ -int Collective::getBcPHIfaceXleft() { - return (bcPHIfaceXleft); -} -/*! get Boundary Condition Electrostatic Potential:FaceYright */ -int Collective::getBcPHIfaceYright() { - return (bcPHIfaceYright); -} -/*! get Boundary Condition Electrostatic Potential:FaceYleft */ -int Collective::getBcPHIfaceYleft() { - return (bcPHIfaceYleft); -} -/*! get Boundary Condition Electrostatic Potential:FaceZright */ -int Collective::getBcPHIfaceZright() { - return (bcPHIfaceZright); -} -/*! get Boundary Condition Electrostatic Potential:FaceZleft */ -int Collective::getBcPHIfaceZleft() { - return (bcPHIfaceZleft); -} -/*! get Boundary Condition EM Field: FaceXright */ -int Collective::getBcEMfaceXright() { - return (bcEMfaceXright); -} -/*! get Boundary Condition EM Field: FaceXleft */ -int Collective::getBcEMfaceXleft() { - return (bcEMfaceXleft); -} -/*! get Boundary Condition EM Field: FaceYright */ -int Collective::getBcEMfaceYright() { - return (bcEMfaceYright); -} -/*! get Boundary Condition EM Field: FaceYleft */ -int Collective::getBcEMfaceYleft() { - return (bcEMfaceYleft); -} -/*! get Boundary Condition EM Field: FaceZright */ -int Collective::getBcEMfaceZright() { - return (bcEMfaceZright); -} -/*! get Boundary Condition EM Field: FaceZleft */ -int Collective::getBcEMfaceZleft() { - return (bcEMfaceZleft); -} -/*! Get GEM Challenge parameters */ -double Collective::getDelta() { - return (delta); -} -double Collective::getB0x() { - return (B0x); -} -double Collective::getB0y() { - return (B0y); -} -double Collective::getB0z() { - return (B0z); -} -double Collective::getB1x(){ - return (B1x); -} -double Collective::getB1y(){ - return (B1y); -} -double Collective::getB1z(){ - return (B1z); -} -/*! get the boolean value for verbose results */ -bool Collective::getVerbose() { - return (verbose); -} -/*! get the boolean value for TrackParticleID */ -bool Collective::getTrackParticleID(int nspecies) { - return (TrackParticleID[nspecies]); -} -int Collective::getRestart_status() { - return (restart_status); -} -/*! get SaveDirName */ -string Collective::getSaveDirName() { - return (SaveDirName); -} -/*! get RestartDirName */ -string Collective::getRestartDirName() { - return (RestartDirName); -} -/*! get inputfile */ -string Collective::getinputfile() { - return (inputfile); -} -/*! get Case type */ -string Collective::getCase() { - return (Case); -} -/*! get simulation name */ -string Collective::getSimName() { - return (SimName); -} -/*! get output writing method */ -string Collective::getWriteMethod() { - return (wmethod); -} -/*! get Poisson correction flag */ -string Collective::getPoissonCorrection() { - return (PoissonCorrection); -} -/*! get last_cycle */ -int Collective::getLast_cycle() { - return (last_cycle); -} -/*! get the velocity of injection of the plasma from the wall */ -double Collective::getVinj() { - return (Vinj); -} -/*! get the converging tolerance for CG solver */ -double Collective::getCGtol() { - return (CGtol); -} -/*! get the converging tolerance for GMRES solver */ -double Collective::getGMREStol() { - return (GMREStol); -} -/*! get the numbers of iteration for the PC mover */ -int Collective::getNiterMover() { - return (NiterMover); -} -/*! output of fields */ -int Collective::getFieldOutputCycle() { - return (FieldOutputCycle); -} -/*! output of particles */ -int Collective::getParticlesOutputCycle() { - return (ParticlesOutputCycle); -} -/*! restart cycle */ -int Collective::getRestartOutputCycle() { - return (RestartOutputCycle); -} -/*! output of fields */ -int Collective::getDiagnosticsOutputCycle() { - return (DiagnosticsOutputCycle); -} diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index dd731c9d..d467dc8d 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -29,7 +29,7 @@ int c_Solver::Init(int argc, char **argv) { ns = col->getNs(); // get the number of particle species involved in simulation first_cycle = col->getLast_cycle() + 1; // get the last cycle from the restart // initialize the virtual cartesian topology - vct = new VCtopology3D(); + vct = new VCtopology3D(*col); // Check if we can map the processes into a matrix ordering defined in Collective.cpp if (nprocs != vct->getNprocs()) { if (myrank == 0) { From 51fa54df43b27816eabe6922e9a3cfd59ece6679 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Wed, 2 Oct 2013 00:04:16 +0200 Subject: [PATCH 040/118] issue #47: updating ipic help: configurable XLEN --- scripts/ipic-help | 11 +---------- scripts/ipic-help-mic | 17 +++-------------- scripts/tags | 7 ------- 3 files changed, 4 insertions(+), 31 deletions(-) delete mode 100644 scripts/tags diff --git a/scripts/ipic-help b/scripts/ipic-help index 5dc5cf41..6856364d 100755 --- a/scripts/ipic-help +++ b/scripts/ipic-help @@ -10,21 +10,12 @@ then cmake .. make # or "make -j" to compile in parallel - Before you build, you should first configure the number of MPI - processes you will use. To do so, you currently have to edit - "communication/VCtopology3D.cpp" (and then recompile in the build - directory). The lines you need to change are: - - XLEN = 2; - YLEN = 2; - ZLEN = 1; - To run the code you can use mkdir data mpiexec.hydra -n 4 -env OMP_NUM_THREADS=1 exec/iPic3D ../inputfiles/GEM.inp - where 4 = XLEN times YLEN times ZLEN. + where 4 = XLEN times YLEN times ZLEN (defined in GEM.inp). Available subcommands: ipic help mic diff --git a/scripts/ipic-help-mic b/scripts/ipic-help-mic index ecb81bce..d983dd72 100755 --- a/scripts/ipic-help-mic +++ b/scripts/ipic-help-mic @@ -2,21 +2,10 @@ #if test $# -lt 1 #then echo ' - For the Xeon, you might want to change this to: - - XLEN = 4; - YLEN = 2; - ZLEN = 1; - - For the Xeon Phi, you might want: - - XLEN = 10; - YLEN = 5; - ZLEN = 1; - - Then to run the code you would use something like: + See "ipic help". Modifications are as follows. + + To run on the Xeon host processor, use something like: - mkdir data mpiexec.hydra -n 8 -env OMP_NUM_THREADS=4 exec/iPic3D ../inputfiles/GEM.inp where 8 = XLEN times YLEN times ZLEN. diff --git a/scripts/tags b/scripts/tags deleted file mode 100644 index 9f0219d6..00000000 --- a/scripts/tags +++ /dev/null @@ -1,7 +0,0 @@ -ipic ipic 1 -ipic-ctags ipic-ctags 1 -ipic-help ipic-help 1 -ipic-help-ctags ipic-help-ctags 1 -ipic-help-mic ipic-help-mic 1 -makefiletags makefiletags 1 -tags tags 1 From c0855af0c05d5cf21d49599a3cd27e132d3d8f87 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Wed, 2 Oct 2013 00:22:16 +0200 Subject: [PATCH 041/118] minor correction to ipic-help --- scripts/ipic-help | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/ipic-help b/scripts/ipic-help index 6856364d..98a88e71 100755 --- a/scripts/ipic-help +++ b/scripts/ipic-help @@ -18,6 +18,8 @@ then where 4 = XLEN times YLEN times ZLEN (defined in GEM.inp). Available subcommands: + + ipic help ctags ipic help mic ' exit From 990cba03675fbf60bb472e502e960ecf80a361b7 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Wed, 2 Oct 2013 00:27:06 +0200 Subject: [PATCH 042/118] created makefile in main directory to give useful info for "make". --- makefile | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 makefile diff --git a/makefile b/makefile new file mode 100644 index 00000000..519cb605 --- /dev/null +++ b/makefile @@ -0,0 +1,12 @@ +# Convenience makefile to call scripts + +help: + scripts/ipic help + +tags: retags + +retags: + scripts/ipic-ctags + +#monitor: +# less +F data/ConservedQuantities.txt From 964e8b07070efe9b909eb002923d1cc9cdcd785f Mon Sep 17 00:00:00 2001 From: eajohnson Date: Wed, 2 Oct 2013 17:27:46 +0200 Subject: [PATCH 043/118] issue #41: doubled rate of summing moments via array(nx,ny,nz,10) --- fields/EMfields3D.cpp | 247 ++++++++++++++++++++++-------------------- fields/Moments.cpp | 14 ++- include/Alloc.h | 42 +++---- include/EMfields3D.h | 15 ++- include/Moments.h | 23 ++++ include/arraysfwd.h | 9 ++ inputfiles/GEM.inp | 4 +- 7 files changed, 213 insertions(+), 141 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index d878b3a8..026698df 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -193,12 +193,14 @@ EMfields3D::EMfields3D(Collective * col, Grid * grid) : sizeMomentsArray = omp_thread_count(); momentsArray = new Moments*[sizeMomentsArray]; - moments10 = (arr4_double**) malloc(sizeof(void*)*sizeMomentsArray); + moments10Array = new Moments10*[sizeMomentsArray]; + //moments10 = (arr4_double**) malloc(sizeof(void*)*sizeMomentsArray); for(int i=0;iinit(nxn,nyn,nzn); - moments10[i] = new arr4_double(nxn,nyn,nzn,10); + //moments10[i] = new arr4_double(nxn,nyn,nzn,10); } } @@ -223,8 +225,8 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop double const*const q = pcls.getQall(); // const int is = pcls.get_ns(); - bool bmoments10 = false; - bool b10moments = true; // turn on doing it the old way + bool bmoments10 = true; + bool b10moments = false; // turn on doing it the old way // if b10moments double* rhons1d = &rhons[is][0][0][0]; @@ -251,8 +253,11 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop int thread_num = omp_get_thread_num(); Moments& speciesMoments = fetch_momentsArray(thread_num); speciesMoments.set_to_zero(); - arr4_double moments = fetch_moments10(thread_num); - moments.setall(0.); + Moments10& speciesMoments10 = fetch_moments10Array(thread_num); + speciesMoments10.set_to_zero(); + arr4_double moments = speciesMoments10.fetch_arr(); + //arr4_double moments = fetch_moments10(thread_num); + //moments.setall(0.); // arr3_double rho = speciesMoments.fetch_rho(); arr3_double Jx = speciesMoments.fetch_Jx(); @@ -323,93 +328,102 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop if(bmoments10) { - moments[ix ][iy ][iz ][0] += velmoments[0]*weight000; - moments[ix ][iy ][iz ][1] += velmoments[1]*weight000; - moments[ix ][iy ][iz ][2] += velmoments[2]*weight000; - moments[ix ][iy ][iz ][3] += velmoments[3]*weight000; - moments[ix ][iy ][iz ][4] += velmoments[4]*weight000; - moments[ix ][iy ][iz ][5] += velmoments[5]*weight000; - moments[ix ][iy ][iz ][6] += velmoments[6]*weight000; - moments[ix ][iy ][iz ][7] += velmoments[7]*weight000; - moments[ix ][iy ][iz ][8] += velmoments[8]*weight000; - moments[ix ][iy ][iz ][9] += velmoments[9]*weight000; - - moments[ix ][iy ][iz-1][0] += velmoments[0]*weight001; - moments[ix ][iy ][iz-1][1] += velmoments[1]*weight001; - moments[ix ][iy ][iz-1][2] += velmoments[2]*weight001; - moments[ix ][iy ][iz-1][3] += velmoments[3]*weight001; - moments[ix ][iy ][iz-1][4] += velmoments[4]*weight001; - moments[ix ][iy ][iz-1][5] += velmoments[5]*weight001; - moments[ix ][iy ][iz-1][6] += velmoments[6]*weight001; - moments[ix ][iy ][iz-1][7] += velmoments[7]*weight001; - moments[ix ][iy ][iz-1][8] += velmoments[8]*weight001; - moments[ix ][iy ][iz-1][9] += velmoments[9]*weight001; - - moments[ix ][iy-1][iz ][0] += velmoments[0]*weight010; - moments[ix ][iy-1][iz ][1] += velmoments[1]*weight010; - moments[ix ][iy-1][iz ][2] += velmoments[2]*weight010; - moments[ix ][iy-1][iz ][3] += velmoments[3]*weight010; - moments[ix ][iy-1][iz ][4] += velmoments[4]*weight010; - moments[ix ][iy-1][iz ][5] += velmoments[5]*weight010; - moments[ix ][iy-1][iz ][6] += velmoments[6]*weight010; - moments[ix ][iy-1][iz ][7] += velmoments[7]*weight010; - moments[ix ][iy-1][iz ][8] += velmoments[8]*weight010; - moments[ix ][iy-1][iz ][9] += velmoments[9]*weight010; - - moments[ix ][iy-1][iz-1][0] += velmoments[0]*weight011; - moments[ix ][iy-1][iz-1][1] += velmoments[1]*weight011; - moments[ix ][iy-1][iz-1][2] += velmoments[2]*weight011; - moments[ix ][iy-1][iz-1][3] += velmoments[3]*weight011; - moments[ix ][iy-1][iz-1][4] += velmoments[4]*weight011; - moments[ix ][iy-1][iz-1][5] += velmoments[5]*weight011; - moments[ix ][iy-1][iz-1][6] += velmoments[6]*weight011; - moments[ix ][iy-1][iz-1][7] += velmoments[7]*weight011; - moments[ix ][iy-1][iz-1][8] += velmoments[8]*weight011; - moments[ix ][iy-1][iz-1][9] += velmoments[9]*weight011; - - moments[ix-1][iy ][iz ][0] += velmoments[0]*weight100; - moments[ix-1][iy ][iz ][1] += velmoments[1]*weight100; - moments[ix-1][iy ][iz ][2] += velmoments[2]*weight100; - moments[ix-1][iy ][iz ][3] += velmoments[3]*weight100; - moments[ix-1][iy ][iz ][4] += velmoments[4]*weight100; - moments[ix-1][iy ][iz ][5] += velmoments[5]*weight100; - moments[ix-1][iy ][iz ][6] += velmoments[6]*weight100; - moments[ix-1][iy ][iz ][7] += velmoments[7]*weight100; - moments[ix-1][iy ][iz ][8] += velmoments[8]*weight100; - moments[ix-1][iy ][iz ][9] += velmoments[9]*weight100; - - moments[ix-1][iy ][iz-1][0] += velmoments[0]*weight101; - moments[ix-1][iy ][iz-1][1] += velmoments[1]*weight101; - moments[ix-1][iy ][iz-1][2] += velmoments[2]*weight101; - moments[ix-1][iy ][iz-1][3] += velmoments[3]*weight101; - moments[ix-1][iy ][iz-1][4] += velmoments[4]*weight101; - moments[ix-1][iy ][iz-1][5] += velmoments[5]*weight101; - moments[ix-1][iy ][iz-1][6] += velmoments[6]*weight101; - moments[ix-1][iy ][iz-1][7] += velmoments[7]*weight101; - moments[ix-1][iy ][iz-1][8] += velmoments[8]*weight101; - moments[ix-1][iy ][iz-1][9] += velmoments[9]*weight101; - - moments[ix-1][iy-1][iz ][0] += velmoments[0]*weight110; - moments[ix-1][iy-1][iz ][1] += velmoments[1]*weight110; - moments[ix-1][iy-1][iz ][2] += velmoments[2]*weight110; - moments[ix-1][iy-1][iz ][3] += velmoments[3]*weight110; - moments[ix-1][iy-1][iz ][4] += velmoments[4]*weight110; - moments[ix-1][iy-1][iz ][5] += velmoments[5]*weight110; - moments[ix-1][iy-1][iz ][6] += velmoments[6]*weight110; - moments[ix-1][iy-1][iz ][7] += velmoments[7]*weight110; - moments[ix-1][iy-1][iz ][8] += velmoments[8]*weight110; - moments[ix-1][iy-1][iz ][9] += velmoments[9]*weight110; - - moments[ix-1][iy-1][iz-1][0] += velmoments[0]*weight111; - moments[ix-1][iy-1][iz-1][1] += velmoments[1]*weight111; - moments[ix-1][iy-1][iz-1][2] += velmoments[2]*weight111; - moments[ix-1][iy-1][iz-1][3] += velmoments[3]*weight111; - moments[ix-1][iy-1][iz-1][4] += velmoments[4]*weight111; - moments[ix-1][iy-1][iz-1][5] += velmoments[5]*weight111; - moments[ix-1][iy-1][iz-1][6] += velmoments[6]*weight111; - moments[ix-1][iy-1][iz-1][7] += velmoments[7]*weight111; - moments[ix-1][iy-1][iz-1][8] += velmoments[8]*weight111; - moments[ix-1][iy-1][iz-1][9] += velmoments[9]*weight111; + arr1_double_fetch moments000 = moments[ix ][iy ][iz ]; + arr1_double_fetch moments001 = moments[ix ][iy ][iz-1]; + arr1_double_fetch moments010 = moments[ix ][iy-1][iz ]; + arr1_double_fetch moments011 = moments[ix ][iy-1][iz-1]; + arr1_double_fetch moments100 = moments[ix-1][iy ][iz ]; + arr1_double_fetch moments101 = moments[ix-1][iy ][iz-1]; + arr1_double_fetch moments110 = moments[ix-1][iy-1][iz ]; + arr1_double_fetch moments111 = moments[ix-1][iy-1][iz-1]; + + moments000[0] += velmoments[0]*weight000; + moments000[1] += velmoments[1]*weight000; + moments000[2] += velmoments[2]*weight000; + moments000[3] += velmoments[3]*weight000; + moments000[4] += velmoments[4]*weight000; + moments000[5] += velmoments[5]*weight000; + moments000[6] += velmoments[6]*weight000; + moments000[7] += velmoments[7]*weight000; + moments000[8] += velmoments[8]*weight000; + moments000[9] += velmoments[9]*weight000; + + moments001[0] += velmoments[0]*weight001; + moments001[1] += velmoments[1]*weight001; + moments001[2] += velmoments[2]*weight001; + moments001[3] += velmoments[3]*weight001; + moments001[4] += velmoments[4]*weight001; + moments001[5] += velmoments[5]*weight001; + moments001[6] += velmoments[6]*weight001; + moments001[7] += velmoments[7]*weight001; + moments001[8] += velmoments[8]*weight001; + moments001[9] += velmoments[9]*weight001; + + moments010[0] += velmoments[0]*weight010; + moments010[1] += velmoments[1]*weight010; + moments010[2] += velmoments[2]*weight010; + moments010[3] += velmoments[3]*weight010; + moments010[4] += velmoments[4]*weight010; + moments010[5] += velmoments[5]*weight010; + moments010[6] += velmoments[6]*weight010; + moments010[7] += velmoments[7]*weight010; + moments010[8] += velmoments[8]*weight010; + moments010[9] += velmoments[9]*weight010; + + moments011[0] += velmoments[0]*weight011; + moments011[1] += velmoments[1]*weight011; + moments011[2] += velmoments[2]*weight011; + moments011[3] += velmoments[3]*weight011; + moments011[4] += velmoments[4]*weight011; + moments011[5] += velmoments[5]*weight011; + moments011[6] += velmoments[6]*weight011; + moments011[7] += velmoments[7]*weight011; + moments011[8] += velmoments[8]*weight011; + moments011[9] += velmoments[9]*weight011; + + moments100[0] += velmoments[0]*weight100; + moments100[1] += velmoments[1]*weight100; + moments100[2] += velmoments[2]*weight100; + moments100[3] += velmoments[3]*weight100; + moments100[4] += velmoments[4]*weight100; + moments100[5] += velmoments[5]*weight100; + moments100[6] += velmoments[6]*weight100; + moments100[7] += velmoments[7]*weight100; + moments100[8] += velmoments[8]*weight100; + moments100[9] += velmoments[9]*weight100; + + moments101[0] += velmoments[0]*weight101; + moments101[1] += velmoments[1]*weight101; + moments101[2] += velmoments[2]*weight101; + moments101[3] += velmoments[3]*weight101; + moments101[4] += velmoments[4]*weight101; + moments101[5] += velmoments[5]*weight101; + moments101[6] += velmoments[6]*weight101; + moments101[7] += velmoments[7]*weight101; + moments101[8] += velmoments[8]*weight101; + moments101[9] += velmoments[9]*weight101; + + moments110[0] += velmoments[0]*weight110; + moments110[1] += velmoments[1]*weight110; + moments110[2] += velmoments[2]*weight110; + moments110[3] += velmoments[3]*weight110; + moments110[4] += velmoments[4]*weight110; + moments110[5] += velmoments[5]*weight110; + moments110[6] += velmoments[6]*weight110; + moments110[7] += velmoments[7]*weight110; + moments110[8] += velmoments[8]*weight110; + moments110[9] += velmoments[9]*weight110; + + moments111[0] += velmoments[0]*weight111; + moments111[1] += velmoments[1]*weight111; + moments111[2] += velmoments[2]*weight111; + moments111[3] += velmoments[3]*weight111; + moments111[4] += velmoments[4]*weight111; + moments111[5] += velmoments[5]*weight111; + moments111[6] += velmoments[6]*weight111; + moments111[7] += velmoments[7]*weight111; + moments111[8] += velmoments[8]*weight111; + moments111[9] += velmoments[9]*weight111; //double weight[2][2][2]; //weight[0][0][0]=weight000; @@ -617,35 +631,35 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop { // #pragma omp critical - for(int i=0;ifree(); + delete moments10Array[i]; + //moments10[i]->free(); } delete [] momentsArray; - free(moments10); + delete [] moments10Array; + //delete [] moments10; + //free(moments10); } diff --git a/fields/Moments.cpp b/fields/Moments.cpp index 6c31a2f1..32376155 100644 --- a/fields/Moments.cpp +++ b/fields/Moments.cpp @@ -1,8 +1,20 @@ #include "Moments.h" #include "Alloc.h" +void Moments10::set_to_zero() +{ + #pragma omp parallel for collapse(4) + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) + for (register int m = 0; m < 10; m++) + { + arr[i][j][k][m] = 0.0; + } +} + void Moments::set_to_zero() { - // #pragma omp parallel for collapse(1) + #pragma omp parallel for collapse(3) for (register int i = 0; i < nx; i++) for (register int j = 0; j < ny; j++) for (register int k = 0; k < nz; k++) { diff --git a/include/Alloc.h b/include/Alloc.h index 8e589321..e8456785 100644 --- a/include/Alloc.h +++ b/include/Alloc.h @@ -58,6 +58,10 @@ code to compile on the latest intel compiler (2013) and on g++ 4.0 (2005); g++ 4.2 (2007) compiled (but unfortunately, for my g++ 4.2, iPic3D suffered from stack frame corruption.) + // + Note that the directive + #if defined(FLAT_ARRAYS) || defined(CHECK_BOUNDS) + appears not only here but also in arraysfwd.h */ #define ALIGNMENT (64) #ifdef __INTEL_COMPILER @@ -217,26 +221,26 @@ namespace iPic3D // classes to dereference arrays. // - // ArrayGetN is essentially a dumbed-down version of ArrN with + // array_fetchN is essentially a dumbed-down version of ArrN with // an index shift applied to the underlying array. The purpose - // of ArrayGetN is to allow elements of multidimensional arrays + // of array_fetchN is to allow elements of multidimensional arrays // to be accessed with a calculated one-dimensional index while // using chained operator[] syntax (e.g. myarr[i][j]), i.e. the // same syntax as is used for native or nested arrays. This // implementation is likely to be slow unless optimization is // turned on, allowing the compiler to figure out that the whole - // chain of calls to the operator[] methods and to the ArrayGetN + // chain of calls to the operator[] methods and to the array_fetchN // constructors reduces to computing a one-dimensional subscript // used to access a one-dimensional array. // template - class ArrayGet1 + class array_fetch1 { type* const __restrict__ arr; const size_t S1; const size_t shift; public: - inline ArrayGet1(type*const arr_, size_t k, size_t s1) : + inline array_fetch1(type*const arr_, size_t k, size_t s1) : arr(arr_), shift(k), S1(s1) {} inline type& operator[](size_t n1){ @@ -247,34 +251,34 @@ namespace iPic3D }; template - class ArrayGet2 + class array_fetch2 { type* const __restrict__ arr; const size_t shift; const size_t S2, S1; public: - inline ArrayGet2(type*const arr_, size_t k, size_t s2, size_t s1) : + inline array_fetch2(type*const arr_, size_t k, size_t s2, size_t s1) : arr(arr_), shift(k), S2(s2), S1(s1) {} - inline ArrayGet1 operator[](size_t n2){ + inline array_fetch1 operator[](size_t n2){ check_bounds(n2,S2); - return ArrayGet1(arr, (shift+n2)*S1, S1); + return array_fetch1(arr, (shift+n2)*S1, S1); } }; template - class ArrayGet3 + class array_fetch3 { type* const __restrict__ arr; const size_t shift; const size_t S3, S2, S1; public: - inline ArrayGet3(type*const arr_, size_t k, size_t s3, size_t s2, size_t s1) : + inline array_fetch3(type*const arr_, size_t k, size_t s3, size_t s2, size_t s1) : arr(arr_), shift(k), S3(s3), S2(s2), S1(s1) {} - inline ArrayGet2 operator[](size_t n3){ + inline array_fetch2 operator[](size_t n3){ check_bounds(n3, S3); - return ArrayGet2(arr, (shift+n3)*S2, S2, S1); + return array_fetch2(arr, (shift+n3)*S2, S2, S1); } }; @@ -426,9 +430,9 @@ namespace iPic3D arr(*in) { } // dereference via calculated index - inline ArrayGet1 operator[](size_t n2){ + inline array_fetch1 operator[](size_t n2){ check_bounds(n2, S2); - return ArrayGet1(arr, n2*S1, S1); + return array_fetch1(arr, n2*S1, S1); } inline size_t getidx(size_t n2, size_t n1) const { @@ -530,9 +534,9 @@ namespace iPic3D { } void free(){ delArray3((type***)arr3); } #if defined(FLAT_ARRAYS) || defined(CHECK_BOUNDS) - inline ArrayGet2 operator[](size_t n3){ + inline array_fetch2 operator[](size_t n3){ check_bounds(n3, S3); - return ArrayGet2(arr, n3*S2, S2, S1); + return array_fetch2(arr, n3*S2, S2, S1); } #else // this causes operator[] to dereference via chained pointer @@ -638,9 +642,9 @@ namespace iPic3D const_array_ref4(in,s4,s3,s2,s1) { } #if defined(FLAT_ARRAYS) || defined(CHECK_BOUNDS) - inline ArrayGet3 operator[](size_t n4){ + inline array_fetch3 operator[](size_t n4){ check_bounds(n4, S4); - return ArrayGet3(arr, n4*S3, S3, S2, S1); + return array_fetch3(arr, n4*S3, S3, S2, S1); } #else operator type****(){ return (type****) arr4; } diff --git a/include/EMfields3D.h b/include/EMfields3D.h index b2105b0b..07999eeb 100644 --- a/include/EMfields3D.h +++ b/include/EMfields3D.h @@ -32,6 +32,7 @@ using std::endl; class Particles3Dcomm; class Moments; +class Moments10; class EMfields3D // :public Field { public: @@ -259,12 +260,17 @@ class EMfields3D // :public Field Moments& fetch_momentsArray(int i){ assert_le(0,i); assert_le(i,sizeMomentsArray); - return *momentsArray[i]; + return *(momentsArray[i]); } - arr4_double fetch_moments10(int i){ + //arr4_double fetch_moments10(int i){ + // assert_le(0,i); + // assert_le(i,sizeMomentsArray); + // return *(moments10[i]); + //} + Moments10& fetch_moments10Array(int i){ assert_le(0,i); assert_le(i,sizeMomentsArray); - return *moments10[i]; + return *(moments10Array[i]); } /*! print electromagnetic fields info */ @@ -390,7 +396,8 @@ class EMfields3D // :public Field /* temporary arrays for summing moments */ int sizeMomentsArray; Moments **momentsArray; - arr4_double** moments10; + Moments10 **moments10Array; + //arr4_double** moments10; // ******************************************************************************* diff --git a/include/Moments.h b/include/Moments.h index fd28e169..33c706fa 100644 --- a/include/Moments.h +++ b/include/Moments.h @@ -2,6 +2,29 @@ #define Moments_H #include "Alloc.h" +class Moments10 +{ + private: + arr4_double arr; + int nx; + int ny; + int nz; + public: + void set_to_zero(); + + // fetch accessors (write access) + arr4_double fetch_arr() { return arr; } + + Moments10(int nxn, int nyn, int nzn) : + nx(nxn), + ny(nyn), + nz(nzn), + arr (nxn, nyn, nzn,10) + { + }; + ~Moments10(){}; +}; + // class to accumulate node-centered species moments // class Moments { diff --git a/include/arraysfwd.h b/include/arraysfwd.h index 41fdbc19..9341a381 100644 --- a/include/arraysfwd.h +++ b/include/arraysfwd.h @@ -49,4 +49,13 @@ typedef iPic3D::array1 array1_double; typedef iPic3D::array2 array2_double; typedef iPic3D::array3 array3_double; typedef iPic3D::array4 array4_double; +// This directive should be consistent with the directives in Alloc.h +#if defined(FLAT_ARRAYS) || defined(CHECK_BOUNDS) +typedef iPic3D::array_fetch1 arr1_double_fetch; +typedef iPic3D::array_get1 arr1_double_get; +#else +typedef double* arr1_double_fetch; +typedef double* arr1_double_get; +#endif + #endif diff --git a/inputfiles/GEM.inp b/inputfiles/GEM.inp index e6f9bc23..f3eb5aab 100644 --- a/inputfiles/GEM.inp +++ b/inputfiles/GEM.inp @@ -53,8 +53,8 @@ nzc = 1 # nzc = number of cells - z direction # %%%%%%%%%%%%%% MPI TOPOLOGY %%%%%%%%%%%%%% # number of MPI subdomains in each direction -XLEN = 4 -YLEN = 4 +XLEN = 2 +YLEN = 2 ZLEN = 1 # topology of subdomains in each dimension (1=true, 0=false) PERIODICX = 1 From 7c6db4e769f6b23a00facc75dc1d29662e32baa5 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Wed, 2 Oct 2013 18:13:20 +0200 Subject: [PATCH 044/118] commenting out deprecated TenMoments class with preprocessor directives --- fields/EMfields3D.cpp | 150 +++++++++++++++------------------- fields/Moments.cpp | 4 +- include/EMfields3D.h | 23 +++--- include/Moments.h | 8 +- particles/Particles3Dcomm.cpp | 1 - 5 files changed, 83 insertions(+), 103 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 026698df..d37a90fb 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -192,15 +192,16 @@ EMfields3D::EMfields3D(Collective * col, Grid * grid) : injFieldsRear = new injInfoFields(nxn, nyn, nzn); sizeMomentsArray = omp_thread_count(); - momentsArray = new Moments*[sizeMomentsArray]; + #ifdef TENMOMENTS + tenMomentsArray = new TenMoments*[sizeMomentsArray]; + #endif // TENMOMENTS moments10Array = new Moments10*[sizeMomentsArray]; - //moments10 = (arr4_double**) malloc(sizeof(void*)*sizeMomentsArray); for(int i=0;iinit(nxn,nyn,nzn); - //moments10[i] = new arr4_double(nxn,nyn,nzn,10); } } @@ -226,7 +227,6 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop // const int is = pcls.get_ns(); bool bmoments10 = true; - bool b10moments = false; // turn on doing it the old way // if b10moments double* rhons1d = &rhons[is][0][0][0]; @@ -251,14 +251,9 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop #pragma omp parallel { int thread_num = omp_get_thread_num(); - Moments& speciesMoments = fetch_momentsArray(thread_num); + #ifdef TENMOMENTS + TenMoments& speciesMoments = fetch_momentsArray(thread_num); speciesMoments.set_to_zero(); - Moments10& speciesMoments10 = fetch_moments10Array(thread_num); - speciesMoments10.set_to_zero(); - arr4_double moments = speciesMoments10.fetch_arr(); - //arr4_double moments = fetch_moments10(thread_num); - //moments.setall(0.); - // arr3_double rho = speciesMoments.fetch_rho(); arr3_double Jx = speciesMoments.fetch_Jx(); arr3_double Jy = speciesMoments.fetch_Jy(); @@ -269,6 +264,10 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop arr3_double Pyy = speciesMoments.fetch_Pyy(); arr3_double Pyz = speciesMoments.fetch_Pyz(); arr3_double Pzz = speciesMoments.fetch_Pzz(); + #endif // TENMOMENTS + Moments10& speciesMoments10 = fetch_moments10Array(thread_num); + speciesMoments10.set_to_zero(); + arr4_double moments = speciesMoments10.fetch_arr(); // The following loop is expensive, so it is wise to assume that the // compiler is stupid. Therefore we should on the one hand // expand things out and on the other hand avoid repeating computations. @@ -444,7 +443,7 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop //} } - if(b10moments) + #ifdef TENMOMENTS { // use the weight to distribute the moments // @@ -549,29 +548,16 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop Pzz[ix-1][iy-1][iz ] += wwi*weight110; Pzz[ix-1][iy-1][iz-1] += wwi*weight111; } + #endif // TENMOMENTS - // why on earth do I observe the following: - // * without openmp, b10moments and bmoments10 gives same results, - // * b10moments gives same results with and without openmp, and - // * bmoments10 gives wrong results when I use openmp. - // I'm using Moments class and moments array exactly the same way - // as far as openmp is concerned... To isolate the problem, - // gradually morph Moments class until implemented via arr4_double... - // Problem in constructor? - // - // - if(b10moments && bmoments10) + #ifdef TENMOMENTS { // check work for(int jx=0;jx<2;jx++) for(int jy=0;jy<2;jy++) for(int jz=0;jz<2;jz++) { - //dprintf("gothere"); - //dprintf("%24.16f == rho[ix-jx][iy-jy][iz-jz]", rho[ix-jx][iy-jy][iz-jz]); - //dprintf("%24.16f == moments[ix-jx][iy-jy][iz-jz][0]", moments[ix-jx][iy-jy][iz-jz][0]); assert_eq(rho[ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][0]); - //dprintf("gothere"); assert_eq(Jx [ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][1]); assert_eq(Jy [ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][2]); assert_eq(Jz [ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][3]); @@ -583,53 +569,53 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop assert_eq(Pzz[ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][9]); } } + #endif // TENMOMENTS } // split up the reduction tasks. // - if(b10moments) + //{ + // // + // // One-dimensional array access is presumably + // // more efficient on poor compilers. + // double* rho1d = &rho[0][0][0]; + // double* Jx1d = &Jx [0][0][0]; + // double* Jy1d = &Jy [0][0][0]; + // double* Jz1d = &Jz [0][0][0]; + // double* Pxx1d = &Pxx[0][0][0]; + // double* Pxy1d = &Pxy[0][0][0]; + // double* Pxz1d = &Pxz[0][0][0]; + // double* Pyy1d = &Pyy[0][0][0]; + // double* Pyz1d = &Pyz[0][0][0]; + // double* Pzz1d = &Pzz[0][0][0]; + // //// + // assert_eq(speciesMoments.get_nx(), nxn); + // assert_eq(speciesMoments.get_ny(), nyn); + // assert_eq(speciesMoments.get_nz(), nzn); + // const int numel = nxn*nyn*nzn; + // #pragma omp critical + // for(int i=0;ifree(); - } - delete [] momentsArray; + #ifdef TENMOMENTS + for(int i=0;i Date: Fri, 4 Oct 2013 16:27:56 +0200 Subject: [PATCH 045/118] issue #49: created ipic.py to replace ipic scripts --- scripts/ipic.py | 232 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100755 scripts/ipic.py diff --git a/scripts/ipic.py b/scripts/ipic.py new file mode 100755 index 00000000..7ae763e7 --- /dev/null +++ b/scripts/ipic.py @@ -0,0 +1,232 @@ +#!/usr/bin/env python + +import sys +import getopt +# http://docs.python.org/2/library/collections.html#collections.deque +from collections import deque # double-ended queue +import os +#from optparse import OptionParser + +# useful documentation: +# +# http://effbot.org/zone/python-list.htm +# http://pymotw.com/2/subprocess/ +# http://stackoverflow.com/questions/3777301/how-to-call-a-shell-script-from-python-code + +def ipic_ctags(args): + # create tags file using ctags + create_tags_command = \ + '''find . -name '*.cpp' -or -name '*.h' | xargs ctags --extra=+qf''' + print create_tags_command + os.system(create_tags_command) + # sort tags file + sort_tags_command = '''LC_ALL=C sort -u tags -o tags''' + print sort_tags_command + os.system(sort_tags_command) + +def ipic_help(): + print ''' + To build, in the iPic3D directory you can use: + + rm -rf build # if necessary + mkdir build + cd build + cmake .. + make # or "make -j" to compile in parallel + + To run the code you can use + + mkdir data + mpiexec -n 4 exec/iPic3D ../inputfiles/GEM.inp + + where 4 = XLEN times YLEN times ZLEN (defined in GEM.inp). + + Available subcommands: + + ''', progname, '''help ctags + ''', progname, '''help mic + ''' + +def ipic_help_mic(args): + print ''' + See "ipic help". Modifications are as follows. + + To run on the Xeon host processor, use something like: + + mpiexec.hydra -n 8 -env OMP_NUM_THREADS=4 exec/iPic3D ../inputfiles/GEM.inp + + where 8 = XLEN times YLEN times ZLEN. + + If you want to cross-compile for the MIC, then the instructions are + different: + + mkdir build.phi + cd build.phi + cmake .. -DCMAKE_TOOLCHAIN_FILE=../cmake/cmake_template.cmake.XeonPhi + make -j + + And to run you use, e.g.: + + mkdir data + mpiexec.hydra -host knc2-mic0 -n 50 -env OMP_NUM_THREADS=4 exec/iPic3D ../inputfiles/GEM.inp + + where 50 = XLEN times YLEN times ZLEN. + ''' + +def ipic_help_ctags(args): + print ''' + Make sure that you are in the source code directory + and then run + + ''', progname, '''ctags + ''' + +def ipic_help_git(args): + print ''' + ### This stub gives examples of git commands ### + + # show branch information + git branch -avv + # examining the .git directory reveals a wealth of information, e.g.: + cat .git/config + # with --stat all files checked in are displayed. + git log --stat + # for the following I just do "git tree" (see .gitconfig below): + git log --oneline --decorate --graph --branches --source + git status # shows file statuses + git remote -v # show remote repositories + # show commits in chronological order. + git reflog + # git reflog is useful to get the sha-1 hash of a commit + # that you recently made and whose branch you accidentally + # deleted, making it no longer reachable. Note that + # each snapshot that you commit should stay in its local + # repository for 90 days before being garbage collected + # unless you do something like "git gc". See also + # http://gitready.com/advanced/2009/01/17/restoring-lost-commits.html + # + # show file + git show mybranch:myfile + eg cat myfile # slightly nicer than git show + # show who checked in what line when under what commit. + git blame myfile # on current branch + git blame amaya-library iPic3D.cpp + + for modification: + + # initialize a repository + mkdir localrepository; cd localrepository; git init + # creating/removing remote: + git remote add myremote https://github.com/alecjohnson/iPic3D.git + git remote rm myremote + # get all branches and their filesystem snapshots + # from myremote that are not already in localrepository + git fetch myremote + # check in mods + git stage myfile + git rm oldfile + git commit + # modify a commit message + git commit --amend + # create a branch and check it out + git checkout -b newbranch + # push branch to server + eg push --branch newbranch myremote + # pull changes from server into current branch + git pull + # delete a branch on server (!): + git push myremote --delete mybranch + + # example of global configuration file: + + $ cat ~/.gitconfig + [user] + name = eajohnson + email = e.alec.johnson@gmail.com + [alias] + tree = log --oneline --decorate --graph --branches --source + undo-commit = reset --soft HEAD~1 + ''' + +def help(args): + if len(args) == 0: + ipic_help() + sys.exit() + + command = deque.popleft(args) + if command == "mic": + ipic_help_mic(args) + elif command == "ctags": + ipic_help_ctags(args) + elif command == "git": + ipic_help_git(args) + else: + print "ipic help", command, "is not supported" + sys.exit(-1) + +def usage(): + print ''' + usage: ''', progname, ''' [options] + + Available commands: + ''', progname, '''ctags + ''', progname, '''help + ''' + +def main(): + + global progname + progname = os.path.basename(sys.argv[0]) + global dirname + dirname = os.path.dirname(sys.argv[0]) + + # it might be better to use the argparse module rather than getopt, + # but unfortunately argparse is only available beginning with python 2.7 + # and most HPC platforms seem to have python 2.6 installed. + # optparse has been deprecated and does not seem to be in python 3; + # note, however, that argparse was initially an extension of optparse + # before giving up on backward compatibility. + # + try: + opts, args = getopt.getopt(sys.argv[1:], 'ho:', ['help', 'output=']) + except getopt.GetoptError, e: + if e.opt == 'o' and 'requires argument' in e.msg: + print 'ERROR: -o requires filename' + else: + usage() + sys.exit(-1) + + for o, a in opts: + if o in ("-h", "--help"): + usage() + sys.exit() + elif o in ("-o", "--output"): + output = a + #else: + # assert False, "unhandled option" + + numargs = len(args) + if numargs==0: + usage() + sys.exit() + + #print args + args = deque(args) + command = deque.popleft(args) + #print list(args) + + if command == "help": + help(args) + elif command == "ctags": + ipic_ctags(args) + #print "ctags not yet implemented" + else: + print progname, command, "not supported" + sys.exit(-1) + + #print os.path.basename(__file__) + #print os.path.dirname(__file__) + +if __name__ == '__main__': + main() + From 8ab8b3a6b5b667b41860ddc92bc3f4b764ca9ea7 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 4 Oct 2013 16:31:32 +0200 Subject: [PATCH 046/118] issue #49: removed ipic shell scripts --- scripts/ipic | 16 ---------------- scripts/ipic-ctags | 8 -------- scripts/ipic-help | 30 ------------------------------ scripts/ipic-help-ctags | 22 ---------------------- scripts/ipic-help-mic | 28 ---------------------------- scripts/makefiletags | 14 -------------- 6 files changed, 118 deletions(-) delete mode 100755 scripts/ipic delete mode 100755 scripts/ipic-ctags delete mode 100755 scripts/ipic-help delete mode 100755 scripts/ipic-help-ctags delete mode 100755 scripts/ipic-help-mic delete mode 100755 scripts/makefiletags diff --git a/scripts/ipic b/scripts/ipic deleted file mode 100755 index 25158819..00000000 --- a/scripts/ipic +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/sh -if test $# -lt 1 -then - echo ' - usage: ipic - - Available ipic commands: - ipic ctags - ipic help -' - exit -fi -DIRNAME=`dirname $0` -APPENDIX="$1" -shift -exec "${DIRNAME}/ipic-${APPENDIX}" "$@" diff --git a/scripts/ipic-ctags b/scripts/ipic-ctags deleted file mode 100755 index df4767be..00000000 --- a/scripts/ipic-ctags +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -DIRNAME=`dirname $0` -echo creating tags file using ctags -find . -name '*.cpp' -or -name '*.h' | xargs ctags --extra=+q -echo creating tag for each C++ file -find . -name '*.cpp' -or -name '*.h' | xargs $DIRNAME/makefiletags >> tags -echo sorting tags file -LC_ALL=C sort -u tags -o tags diff --git a/scripts/ipic-help b/scripts/ipic-help deleted file mode 100755 index 98a88e71..00000000 --- a/scripts/ipic-help +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/sh -if test $# -lt 1 -then - echo ' - To build, in the iPic3D directory you can use: - - rm -rf build # if necessary - mkdir build - cd build - cmake .. - make # or "make -j" to compile in parallel - - To run the code you can use - - mkdir data - mpiexec.hydra -n 4 -env OMP_NUM_THREADS=1 exec/iPic3D ../inputfiles/GEM.inp - - where 4 = XLEN times YLEN times ZLEN (defined in GEM.inp). - - Available subcommands: - - ipic help ctags - ipic help mic -' - exit -fi -DIRNAME=`dirname $0` -APPENDIX="$1" -shift -exec "${DIRNAME}/ipic-help-${APPENDIX}" "$@" diff --git a/scripts/ipic-help-ctags b/scripts/ipic-help-ctags deleted file mode 100755 index e80c00c1..00000000 --- a/scripts/ipic-help-ctags +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/sh -DIRNAME=`dirname $0` -SCRIPTSDIRNAME=`cd "${DIRNAME}"; pwd` -PARENTOFSCRIPTSDIRNAME=`dirname "${SCRIPTSDIRNAME}"` -#if test $# -lt 1 -#then - echo ' - Make sure that you are in the source code directory via e.g. - - cd '"${PARENTOFSCRIPTSDIRNAME}"' - - and then run the script, e.g. via - - '"${SCRIPTSDIRNAME}"'/ipic ctags - - or - - ipic ctags - - if you have '"${DIRNAME}"' in your path. -' - diff --git a/scripts/ipic-help-mic b/scripts/ipic-help-mic deleted file mode 100755 index d983dd72..00000000 --- a/scripts/ipic-help-mic +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/sh -#if test $# -lt 1 -#then - echo ' - See "ipic help". Modifications are as follows. - - To run on the Xeon host processor, use something like: - - mpiexec.hydra -n 8 -env OMP_NUM_THREADS=4 exec/iPic3D ../inputfiles/GEM.inp - - where 8 = XLEN times YLEN times ZLEN. - - If you want to cross-compile for the MIC, then the instructions are - different: - - mkdir build.phi - cd build.phi - cmake .. -DCMAKE_TOOLCHAIN_FILE=../cmake/cmake_template.cmake.XeonPhi - make -j - - And to run you use, e.g.: - - mkdir data - mpiexec.hydra -host knc2-mic0 -n 50 -env OMP_NUM_THREADS=4 exec/iPic3D ../inputfiles/GEM.inp - - where 50 = XLEN times YLEN times ZLEN. -' - diff --git a/scripts/makefiletags b/scripts/makefiletags deleted file mode 100755 index da0fb7d1..00000000 --- a/scripts/makefiletags +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh -# generate a tag for each file name argument -ls $* 2>&1| sed 's/ /\ -/g' \ -| perl -ne ' - if(m@/@) { - m@(.*?)/([^/\n]+)$@; - print "$2\t$1/$2\t1\n"; - } else { - m@(.*)@; - print "$1\t$1\t1\n"; - } - ' -echo "tags tags 1" >> tags From 4f74c32a626403a42086a89901d511701c179571 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 4 Oct 2013 16:37:15 +0200 Subject: [PATCH 047/118] iss #49: scripts/ipic linked to scripts/ipic.py --- makefile | 2 +- scripts/ipic | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 120000 scripts/ipic diff --git a/makefile b/makefile index 519cb605..3ca3c2a6 100644 --- a/makefile +++ b/makefile @@ -6,7 +6,7 @@ help: tags: retags retags: - scripts/ipic-ctags + scripts/ipic ctags #monitor: # less +F data/ConservedQuantities.txt diff --git a/scripts/ipic b/scripts/ipic new file mode 120000 index 00000000..e264ff86 --- /dev/null +++ b/scripts/ipic @@ -0,0 +1 @@ +ipic.py \ No newline at end of file From 4b92444ed6a1c71a6effb4ae7ba46f029e26569c Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 7 Oct 2013 10:17:13 +0200 Subject: [PATCH 048/118] give environment info in response to "ipic help deep" --- scripts/ipic.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/scripts/ipic.py b/scripts/ipic.py index 7ae763e7..90948c4b 100755 --- a/scripts/ipic.py +++ b/scripts/ipic.py @@ -45,6 +45,7 @@ def ipic_help(): ''', progname, '''help ctags ''', progname, '''help mic + ''', progname, '''help deep ''' def ipic_help_mic(args): @@ -71,6 +72,21 @@ def ipic_help_mic(args): mpiexec.hydra -host knc2-mic0 -n 50 -env OMP_NUM_THREADS=4 exec/iPic3D ../inputfiles/GEM.inp where 50 = XLEN times YLEN times ZLEN. + + See also: + ''', progname, '''help deep + ''' + +def ipic_help_deep(args): + print ''' + DEEP needs the following modules: + + module load hdf5/1.8.10-patch1 + module load knc/intel_mpi/4.1.0.030 + module load knc/mic + + For instructions on how to build and run, see + ''', progname, '''help mic ''' def ipic_help_ctags(args): @@ -156,6 +172,8 @@ def help(args): command = deque.popleft(args) if command == "mic": ipic_help_mic(args) + elif command == "deep": + ipic_help_deep(args) elif command == "ctags": ipic_help_ctags(args) elif command == "git": From 179b9a56d21d1c2b45b6cd3d8239281eb1894796 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 7 Oct 2013 12:03:25 +0200 Subject: [PATCH 049/118] added support for single-precision particles --- include/Particles.h | 3 ++ particles/Particles3D.cpp | 90 +++++++++++++++++++-------------------- 2 files changed, 48 insertions(+), 45 deletions(-) diff --git a/include/Particles.h b/include/Particles.h index e8a903da..f5420a75 100644 --- a/include/Particles.h +++ b/include/Particles.h @@ -21,6 +21,9 @@ developers: Stefano Markidis, Giovanni Lapenta * */ +// precision to use for particles +//typedef float pfloat; +typedef double pfloat; class Particles { public: diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 469fff44..76e92f08 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -333,24 +333,24 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { #pragma simd // this just slows things down (why?) for (int rest = 0; rest < nop; rest++) { // copy the particle - double xp = x[rest]; - double yp = y[rest]; - double zp = z[rest]; - double up = u[rest]; - double vp = v[rest]; - double wp = w[rest]; - const double xptilde = x[rest]; - const double yptilde = y[rest]; - const double zptilde = z[rest]; - double uptilde; - double vptilde; - double wptilde; + pfloat xp = x[rest]; + pfloat yp = y[rest]; + pfloat zp = z[rest]; + pfloat up = u[rest]; + pfloat vp = v[rest]; + pfloat wp = w[rest]; + const pfloat xptilde = x[rest]; + const pfloat yptilde = y[rest]; + const pfloat zptilde = z[rest]; + pfloat uptilde; + pfloat vptilde; + pfloat wptilde; // calculate the average velocity iteratively for (int innter = 0; innter < 1; innter++) { // interpolation G-->P - const double ixd = floor((xp - xstart) * inv_dx); - const double iyd = floor((yp - ystart) * inv_dy); - const double izd = floor((zp - zstart) * inv_dz); + const pfloat ixd = floor((xp - xstart) * inv_dx); + const pfloat iyd = floor((yp - ystart) * inv_dy); + const pfloat izd = floor((zp - zstart) * inv_dz); int ix = 2 + int (ixd); int iy = 2 + int (iyd); int iz = 2 + int (izd); @@ -367,9 +367,9 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { if (iz > nzn - 1) iz = nzn - 1; - double xi[2]; - double eta[2]; - double zeta[2]; + pfloat xi[2]; + pfloat eta[2]; + pfloat zeta[2]; xi[0] = xp - grid->getXN(ix-1); eta[0] = yp - grid->getYN(iy-1); zeta[0] = zp - grid->getZN(iz-1); @@ -377,16 +377,16 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { eta[1] = grid->getYN(iy) - yp; zeta[1] = grid->getZN(iz) - zp; - double Exl = 0.0; - double Eyl = 0.0; - double Ezl = 0.0; - double Bxl = 0.0; - double Byl = 0.0; - double Bzl = 0.0; + pfloat Exl = 0.0; + pfloat Eyl = 0.0; + pfloat Ezl = 0.0; + pfloat Bxl = 0.0; + pfloat Byl = 0.0; + pfloat Bzl = 0.0; // MIC refuses to vectorize this ... // - // double weight[2][2][2]; + // pfloat weight[2][2][2]; // for (int ii = 0; ii < 2; ii++) // for (int jj = 0; jj < 2; jj++) // for (int kk = 0; kk < 2; kk++) @@ -394,12 +394,12 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { // for (int ii = 0; ii < 2; ii++) // for (int jj = 0; jj < 2; jj++) // for (int kk = 0; kk < 2; kk++) { - // const double Exlp = weight[ii][jj][kk] * Ex.get(ix - ii, iy - jj, iz - kk); - // const double Eylp = weight[ii][jj][kk] * Ey.get(ix - ii, iy - jj, iz - kk); - // const double Ezlp = weight[ii][jj][kk] * Ez.get(ix - ii, iy - jj, iz - kk); - // const double Bxlp = weight[ii][jj][kk] * Bx.get(ix - ii, iy - jj, iz - kk); - // const double Bylp = weight[ii][jj][kk] * By.get(ix - ii, iy - jj, iz - kk); - // const double Bzlp = weight[ii][jj][kk] * Bz.get(ix - ii, iy - jj, iz - kk); + // const pfloat Exlp = weight[ii][jj][kk] * Ex.get(ix - ii, iy - jj, iz - kk); + // const pfloat Eylp = weight[ii][jj][kk] * Ey.get(ix - ii, iy - jj, iz - kk); + // const pfloat Ezlp = weight[ii][jj][kk] * Ez.get(ix - ii, iy - jj, iz - kk); + // const pfloat Bxlp = weight[ii][jj][kk] * Bx.get(ix - ii, iy - jj, iz - kk); + // const pfloat Bylp = weight[ii][jj][kk] * By.get(ix - ii, iy - jj, iz - kk); + // const pfloat Bzlp = weight[ii][jj][kk] * Bz.get(ix - ii, iy - jj, iz - kk); // Exl += Exlp; // Eyl += Eylp; // Ezl += Ezlp; @@ -410,14 +410,14 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { // ... so we expand things out instead // - const double weight000 = xi[0] * eta[0] * zeta[0] * invVOL; - const double weight001 = xi[0] * eta[0] * zeta[1] * invVOL; - const double weight010 = xi[0] * eta[1] * zeta[0] * invVOL; - const double weight011 = xi[0] * eta[1] * zeta[1] * invVOL; - const double weight100 = xi[1] * eta[0] * zeta[0] * invVOL; - const double weight101 = xi[1] * eta[0] * zeta[1] * invVOL; - const double weight110 = xi[1] * eta[1] * zeta[0] * invVOL; - const double weight111 = xi[1] * eta[1] * zeta[1] * invVOL; + const pfloat weight000 = xi[0] * eta[0] * zeta[0] * invVOL; + const pfloat weight001 = xi[0] * eta[0] * zeta[1] * invVOL; + const pfloat weight010 = xi[0] * eta[1] * zeta[0] * invVOL; + const pfloat weight011 = xi[0] * eta[1] * zeta[1] * invVOL; + const pfloat weight100 = xi[1] * eta[0] * zeta[0] * invVOL; + const pfloat weight101 = xi[1] * eta[0] * zeta[1] * invVOL; + const pfloat weight110 = xi[1] * eta[1] * zeta[0] * invVOL; + const pfloat weight111 = xi[1] * eta[1] * zeta[1] * invVOL; // Bxl += weight000 * Bx[ix][iy][iz]; Bxl += weight001 * Bx[ix][iy][iz - 1]; @@ -474,13 +474,13 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { Ezl += weight111 * Ez[ix - 1][iy - 1][iz - 1]; // end interpolation - const double omdtsq = qomdt2 * qomdt2 * (Bxl * Bxl + Byl * Byl + Bzl * Bzl); - const double denom = 1.0 / (1.0 + omdtsq); + const pfloat omdtsq = qomdt2 * qomdt2 * (Bxl * Bxl + Byl * Byl + Bzl * Bzl); + const pfloat denom = 1.0 / (1.0 + omdtsq); // solve the position equation - const double ut = up + qomdt2 * Exl; - const double vt = vp + qomdt2 * Eyl; - const double wt = wp + qomdt2 * Ezl; - const double udotb = ut * Bxl + vt * Byl + wt * Bzl; + const pfloat ut = up + qomdt2 * Exl; + const pfloat vt = vp + qomdt2 * Eyl; + const pfloat wt = wp + qomdt2 * Ezl; + const pfloat udotb = ut * Bxl + vt * Byl + wt * Bzl; // solve the velocity equation uptilde = (ut + qomdt2 * (vt * Bzl - wt * Byl + qomdt2 * udotb * Bxl)) * denom; vptilde = (vt + qomdt2 * (wt * Bxl - ut * Bzl + qomdt2 * udotb * Byl)) * denom; From 5da183b05da30f7215653188b9a73f820dc9e050 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 8 Oct 2013 10:45:24 +0200 Subject: [PATCH 050/118] issue #52: use separate fieldForPcls array to push particles --- communication/ComNodes3D.cpp | 1 + fields/EMfields3D.cpp | 28 +++++- grids/Grid3DCU.cpp | 6 ++ include/EMfields3D.h | 8 ++ include/Grid3DCU.h | 8 +- include/Particles.h | 4 - include/arraysfwd.h | 5 + include/ipicdefs.h | 19 ++++ main/iPic3Dlib.cpp | 4 + particles/Particles3D.cpp | 172 +++++++++++++++++++++++++++++++---- 10 files changed, 230 insertions(+), 25 deletions(-) diff --git a/communication/ComNodes3D.cpp b/communication/ComNodes3D.cpp index 73906404..0d97ce89 100644 --- a/communication/ComNodes3D.cpp +++ b/communication/ComNodes3D.cpp @@ -1,4 +1,5 @@ +#include "mpi.h" #include "ComNodes3D.h" #include "TimeTasks.h" #include "ipicdefs.h" diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index d37a90fb..9856e3a8 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -52,6 +52,7 @@ EMfields3D::EMfields3D(Collective * col, Grid * grid) : // // array allocation: nodes // + fieldForPcls (nxn, nyn, nzn, 6), Ex (nxn, nyn, nzn), Ey (nxn, nyn, nzn), Ez (nxn, nyn, nzn), @@ -226,9 +227,8 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop double const*const q = pcls.getQall(); // const int is = pcls.get_ns(); - bool bmoments10 = true; - // if b10moments + #ifdef TENMOMENTS double* rhons1d = &rhons[is][0][0][0]; double* Jxs1d = &Jxs [is][0][0][0]; double* Jys1d = &Jys [is][0][0][0]; @@ -239,6 +239,7 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop double* pYYsn1d = &pYYsn[is][0][0][0]; double* pYZsn1d = &pYZsn[is][0][0][0]; double* pZZsn1d = &pZZsn[is][0][0][0]; + #endif // const long long nop_ll = pcls.getNOP(); const int nop = pcls.getNOP(); @@ -325,7 +326,7 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop const double weight110 = qi * xi1 * eta1 * zeta0 * invVOL; const double weight111 = qi * xi1 * eta1 * zeta1 * invVOL; - if(bmoments10) + // add particle to moments { arr1_double_fetch moments000 = moments[ix ][iy ][iz ]; arr1_double_fetch moments001 = moments[ix ][iy ][iz-1]; @@ -1375,6 +1376,27 @@ void EMfields3D::ConstantChargePlanet(Grid * grid, VirtualTopology3D * vct, doub } +/*! Populate the field data used to push particles */ +// +// One could add a background magnetic field B_ext at this point, +// which was incompletely implemented in commit 05082fc8ad688 +// +void EMfields3D::set_fieldForPcls() +{ + #pragma omp parallel for collapse(3) + for(int i=0;igetCartesian_rank() == 0) diff --git a/grids/Grid3DCU.cpp b/grids/Grid3DCU.cpp index 5e7c3e2b..fa6bdd49 100644 --- a/grids/Grid3DCU.cpp +++ b/grids/Grid3DCU.cpp @@ -50,12 +50,18 @@ Grid3DCU::Grid3DCU(CollectiveIO * col, VirtualTopology3D * vct) { zEnd = zStart + (col->getLz() / (double) vct->getZLEN()); // arrays allocation: nodes ---> the first node has index 1, the last has index nxn-2! + pfloat_node_xcoord = new pfloat[nxn]; + pfloat_node_ycoord = new pfloat[nyn]; + pfloat_node_zcoord = new pfloat[nzn]; node_xcoord = new double[nxn]; node_ycoord = new double[nyn]; node_zcoord = new double[nzn]; for (int i=0; i the first cell has index 1, the last has index ncn-2! center_xcoord = new double[nxc]; center_ycoord = new double[nyc]; diff --git a/include/EMfields3D.h b/include/EMfields3D.h index 9ce5f215..38c62018 100644 --- a/include/EMfields3D.h +++ b/include/EMfields3D.h @@ -118,6 +118,8 @@ class EMfields3D // :public Field /*! smooth the electric field */ void smoothE(double value, VirtualTopology3D * vct, Collective *col); + /*! copy the field data to the array used to move the particles */ + void set_fieldForPcls(); /*! communicate ghost for grid -> Particles interpolation */ void communicateGhostP2G(int ns, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, VirtualTopology3D * vct); void sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTopology3D * vct); @@ -187,6 +189,7 @@ class EMfields3D // :public Field double getBy(int X, int Y, int Z) const { return Byn.get(X,Y,Z);} double getBz(int X, int Y, int Z) const { return Bzn.get(X,Y,Z);} // + const_arr4_pfloat get_fieldForPcls() { return fieldForPcls; } arr3_double getEx() { return Ex; } arr3_double getEy() { return Ey; } arr3_double getEz() { return Ez; } @@ -339,6 +342,11 @@ class EMfields3D // :public Field /*! PHI: electric potential (indexX, indexY, indexZ), defined on central points between nodes */ array3_double PHI; + // Electric field component used to move particles + // organized for rapid access in mover_PC() + // [This is the information transferred from cluster to booster]. + array4_pfloat fieldForPcls; + // Electric field components defined on nodes // array3_double Ex; diff --git a/include/Grid3DCU.h b/include/Grid3DCU.h index 2754ec95..b7eb7a6b 100644 --- a/include/Grid3DCU.h +++ b/include/Grid3DCU.h @@ -7,8 +7,6 @@ #ifndef GRID3DCU_H #define GRID3DCU_H -#include - #include "Grid.h" #include "CollectiveIO.h" #include "ComInterpNodes3D.h" @@ -142,6 +140,9 @@ class Grid3DCU // :public Grid /** invol = inverse of volume*/ double invVOL; /** node coordinate */ + pfloat *pfloat_node_xcoord; + pfloat *pfloat_node_ycoord; + pfloat *pfloat_node_zcoord; double *node_xcoord; double *node_ycoord; double *node_zcoord; @@ -169,6 +170,9 @@ class Grid3DCU // :public Grid //const double &calcXN(int X) { return xStart+(X-1)*dx;} //const double &calcYN(int Y) { return yStart+(Y-1)*dy;} //const double &calcZN(int Z) { return zStart+(Z-1)*dz;} + const pfloat &get_pfloat_XN(int X) { return pfloat_node_xcoord[X];} + const pfloat &get_pfloat_YN(int Y) { return pfloat_node_ycoord[Y];} + const pfloat &get_pfloat_ZN(int Z) { return pfloat_node_zcoord[Z];} const double &getXN(int X) { return node_xcoord[X];} const double &getYN(int Y) { return node_ycoord[Y];} const double &getZN(int Z) { return node_zcoord[Z];} diff --git a/include/Particles.h b/include/Particles.h index f5420a75..be97ac28 100644 --- a/include/Particles.h +++ b/include/Particles.h @@ -21,10 +21,6 @@ developers: Stefano Markidis, Giovanni Lapenta * */ -// precision to use for particles -//typedef float pfloat; -typedef double pfloat; - class Particles { public: /** allocate particles */ diff --git a/include/arraysfwd.h b/include/arraysfwd.h index 9341a381..30bda425 100644 --- a/include/arraysfwd.h +++ b/include/arraysfwd.h @@ -1,6 +1,7 @@ /* forward declaration for array classes */ #ifndef arraysfwd_h #define arraysfwd_h +#include "ipicdefs.h" // for pfloat namespace iPic3D { @@ -41,6 +42,7 @@ namespace iPic3D // typedef iPic3D::const_array_ref3 const_arr3_double; typedef iPic3D::const_array_ref4 const_arr4_double; +typedef iPic3D::const_array_ref4 const_arr4_pfloat; typedef iPic3D::array_ref1 arr1_double; typedef iPic3D::array_ref2 arr2_double; typedef iPic3D::array_ref3 arr3_double; @@ -49,13 +51,16 @@ typedef iPic3D::array1 array1_double; typedef iPic3D::array2 array2_double; typedef iPic3D::array3 array3_double; typedef iPic3D::array4 array4_double; +typedef iPic3D::array4 array4_pfloat; // This directive should be consistent with the directives in Alloc.h #if defined(FLAT_ARRAYS) || defined(CHECK_BOUNDS) typedef iPic3D::array_fetch1 arr1_double_fetch; typedef iPic3D::array_get1 arr1_double_get; +typedef iPic3D::array_get1 arr1_pfloat_get; #else typedef double* arr1_double_fetch; typedef double* arr1_double_get; +typedef pfloat* arr1_pfloat_get; #endif #endif diff --git a/include/ipicdefs.h b/include/ipicdefs.h index ef86aaf1..1031f60c 100644 --- a/include/ipicdefs.h +++ b/include/ipicdefs.h @@ -10,4 +10,23 @@ // use precprocessor to remove MPI_Barrier() calls. #define MPI_Barrier(args...) +//#define SINGLE_PRECISION_PCLS +// +// single precision does not seem to help on the MIC +#ifdef SINGLE_PRECISION_PCLS + typedef float pfloat; + #ifdef __MIC__ + #define VECTOR_WIDTH 16 + #else + #define VECTOR_WIDTH 8 + #endif +#else + #ifdef __MIC__ + #define VECTOR_WIDTH 8 + #else + #define VECTOR_WIDTH 4 + #endif + typedef double pfloat; +#endif + #endif diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index d467dc8d..1cc76f78 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -218,9 +218,13 @@ bool c_Solver::ParticlesMover() { /* -------------- */ timeTasks.start(TimeTasks::PARTICLES); + // Should change this to add background guide field + EMf->set_fieldForPcls(); for (int i = 0; i < ns; i++) // move each species { // #pragma omp task inout(part[i]) in(grid) target_device(booster) + // + // should merely pass EMf->get_fieldForPcls() rather than EMf. mem_avail = part[i].mover_PC(grid, vct, EMf); // use the Predictor Corrector scheme } timeTasks.end(TimeTasks::PARTICLES); diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 76e92f08..7bdf7162 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -316,32 +316,50 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { cout << "*** MOVER species " << ns << " ***" << NiterMover << " ITERATIONS ****" << endl; } double start_mover_PC = MPI_Wtime(); + #if 0 const_arr3_double Ex = EMf->getEx(); const_arr3_double Ey = EMf->getEy(); const_arr3_double Ez = EMf->getEz(); const_arr3_double Bx = EMf->getBx(); const_arr3_double By = EMf->getBy(); const_arr3_double Bz = EMf->getBz(); + #endif + const_arr4_pfloat fieldForPcls = EMf->get_fieldForPcls(); - const double dto2 = .5 * dt, qomdt2 = qom * dto2 / c; - const double inv_dx = 1.0 / dx, inv_dy = 1.0 / dy, inv_dz = 1.0 / dz; + #if 0 + for(int i=0;igetXN(ix-1); - eta[0] = yp - grid->getYN(iy-1); - zeta[0] = zp - grid->getZN(iz-1); - xi[1] = grid->getXN(ix) - xp; - eta[1] = grid->getYN(iy) - yp; - zeta[1] = grid->getZN(iz) - zp; + xi[0] = xp - grid->get_pfloat_XN(ix-1); + eta[0] = yp - grid->get_pfloat_YN(iy-1); + zeta[0] = zp - grid->get_pfloat_ZN(iz-1); + xi[1] = grid->get_pfloat_XN(ix) - xp; + eta[1] = grid->get_pfloat_YN(iy) - yp; + zeta[1] = grid->get_pfloat_ZN(iz) - zp; pfloat Exl = 0.0; pfloat Eyl = 0.0; @@ -418,7 +436,128 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { const pfloat weight101 = xi[1] * eta[0] * zeta[1] * invVOL; const pfloat weight110 = xi[1] * eta[1] * zeta[0] * invVOL; const pfloat weight111 = xi[1] * eta[1] * zeta[1] * invVOL; + + // creating these aliases seems to accelerate this method by about 30% + // on the Xeon host, processor, suggesting deficiency in the optimizer. + // + arr1_pfloat_get field000 = fieldForPcls[ix ][iy ][iz ]; + arr1_pfloat_get field001 = fieldForPcls[ix ][iy ][iz-1]; + arr1_pfloat_get field010 = fieldForPcls[ix ][iy-1][iz ]; + arr1_pfloat_get field011 = fieldForPcls[ix ][iy-1][iz-1]; + arr1_pfloat_get field100 = fieldForPcls[ix-1][iy ][iz ]; + arr1_pfloat_get field101 = fieldForPcls[ix-1][iy ][iz-1]; + arr1_pfloat_get field110 = fieldForPcls[ix-1][iy-1][iz ]; + arr1_pfloat_get field111 = fieldForPcls[ix-1][iy-1][iz-1]; // + #if 0 // (takes same time as other order) + Bxl += weight000 * field000[0]; + Bxl += weight001 * field001[0]; + Bxl += weight010 * field010[0]; + Bxl += weight011 * field011[0]; + Bxl += weight100 * field100[0]; + Bxl += weight101 * field101[0]; + Bxl += weight110 * field110[0]; + Bxl += weight111 * field111[0]; + Byl += weight000 * field000[1]; + Byl += weight001 * field001[1]; + Byl += weight010 * field010[1]; + Byl += weight011 * field011[1]; + Byl += weight100 * field100[1]; + Byl += weight101 * field101[1]; + Byl += weight110 * field110[1]; + Byl += weight111 * field111[1]; + Bzl += weight000 * field000[2]; + Bzl += weight001 * field001[2]; + Bzl += weight010 * field010[2]; + Bzl += weight011 * field011[2]; + Bzl += weight100 * field100[2]; + Bzl += weight101 * field101[2]; + Bzl += weight110 * field110[2]; + Bzl += weight111 * field111[2]; + Exl += weight000 * field000[3]; + Exl += weight001 * field001[3]; + Exl += weight010 * field010[3]; + Exl += weight011 * field011[3]; + Exl += weight100 * field100[3]; + Exl += weight101 * field101[3]; + Exl += weight110 * field110[3]; + Exl += weight111 * field111[3]; + Eyl += weight000 * field000[4]; + Eyl += weight001 * field001[4]; + Eyl += weight010 * field010[4]; + Eyl += weight011 * field011[4]; + Eyl += weight100 * field100[4]; + Eyl += weight101 * field101[4]; + Eyl += weight110 * field110[4]; + Eyl += weight111 * field111[4]; + Ezl += weight000 * field000[5]; + Ezl += weight001 * field001[5]; + Ezl += weight010 * field010[5]; + Ezl += weight011 * field011[5]; + Ezl += weight100 * field100[5]; + Ezl += weight101 * field101[5]; + Ezl += weight110 * field110[5]; + Ezl += weight111 * field111[5]; + #endif + + Bxl += weight000 * field000[0]; + Byl += weight000 * field000[1]; + Bzl += weight000 * field000[2]; + Exl += weight000 * field000[3]; + Eyl += weight000 * field000[4]; + Ezl += weight000 * field000[5]; + + Bxl += weight001 * field001[0]; + Byl += weight001 * field001[1]; + Bzl += weight001 * field001[2]; + Exl += weight001 * field001[3]; + Eyl += weight001 * field001[4]; + Ezl += weight001 * field001[5]; + + Bxl += weight010 * field010[0]; + Byl += weight010 * field010[1]; + Bzl += weight010 * field010[2]; + Exl += weight010 * field010[3]; + Eyl += weight010 * field010[4]; + Ezl += weight010 * field010[5]; + + Bxl += weight011 * field011[0]; + Byl += weight011 * field011[1]; + Bzl += weight011 * field011[2]; + Exl += weight011 * field011[3]; + Eyl += weight011 * field011[4]; + Ezl += weight011 * field011[5]; + + Bxl += weight100 * field100[0]; + Byl += weight100 * field100[1]; + Bzl += weight100 * field100[2]; + Exl += weight100 * field100[3]; + Eyl += weight100 * field100[4]; + Ezl += weight100 * field100[5]; + + Bxl += weight101 * field101[0]; + Byl += weight101 * field101[1]; + Bzl += weight101 * field101[2]; + Exl += weight101 * field101[3]; + Eyl += weight101 * field101[4]; + Ezl += weight101 * field101[5]; + + Bxl += weight110 * field110[0]; + Byl += weight110 * field110[1]; + Bzl += weight110 * field110[2]; + Exl += weight110 * field110[3]; + Eyl += weight110 * field110[4]; + Ezl += weight110 * field110[5]; + + Bxl += weight111 * field111[0]; + Byl += weight111 * field111[1]; + Bzl += weight111 * field111[2]; + Exl += weight111 * field111[3]; + Eyl += weight111 * field111[4]; + Ezl += weight111 * field111[5]; + + #if 0 + Bxl += weight000 * Bx[ix][iy][iz]; Bxl += weight000 * Bx[ix][iy][iz]; Bxl += weight001 * Bx[ix][iy][iz - 1]; Bxl += weight010 * Bx[ix][iy - 1][iz]; @@ -472,6 +611,7 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { Ezl += weight101 * Ez[ix - 1][iy][iz - 1]; Ezl += weight110 * Ez[ix - 1][iy - 1][iz]; Ezl += weight111 * Ez[ix - 1][iy - 1][iz - 1]; + #endif // end interpolation const pfloat omdtsq = qomdt2 * qomdt2 * (Bxl * Bxl + Byl * Byl + Bzl * Bzl); From 379a03070fb0bebf3dd447abb50238e296ed8e8c Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 8 Oct 2013 13:07:13 +0200 Subject: [PATCH 051/118] commented out #pragma simd directive: was only hurting performance --- particles/Particles3D.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 7bdf7162..356bf4a3 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -348,7 +348,7 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { // to do it by hand only hurts performance. #pragma omp parallel for // why does single precision make no difference in execution speed? - #pragma simd vectorlength(VECTOR_WIDTH) + //#pragma simd vectorlength(VECTOR_WIDTH) for (int rest = 0; rest < nop; rest++) { // copy the particle const pfloat xptilde = x[rest]; From 93b043f50937a974b12d139efb882b066dddc5cf Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 8 Oct 2013 15:15:12 +0200 Subject: [PATCH 052/118] Plugged memory leak in c_Solver::WriteConserved --- main/iPic3Dlib.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 1cc76f78..58e3fd44 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -316,6 +316,7 @@ void c_Solver::WriteConserved(int cycle) { my_file << endl; my_file.close(); } + delete [] VelocityDist; } } } From 09ed53e97ae1de6f0233a25bd59b347a9c1b3fa5 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 8 Oct 2013 15:17:00 +0200 Subject: [PATCH 053/118] issue #53: use long long for pclId, not arr index --- fields/EMfields3D.cpp | 2 - include/Collective.h | 8 ++-- include/Particles.h | 22 +++++------ include/Particles3Dcomm.h | 42 ++++++++++---------- include/iPic3D.h | 2 +- inputoutput/Collective.cpp | 15 ++++--- main/iPic3Dlib.cpp | 2 +- particles/Particles3D.cpp | 73 +++++++++++++++++------------------ particles/Particles3Dcomm.cpp | 69 +++++++++++++++++---------------- 9 files changed, 119 insertions(+), 116 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 9856e3a8..71872157 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -241,9 +241,7 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop double* pZZsn1d = &pZZsn[is][0][0][0]; #endif // - const long long nop_ll = pcls.getNOP(); const int nop = pcls.getNOP(); - assert_le(nop_ll, (long long) INT_MAX); // else would need to use long long // To make memory use scale to a large number of threads, we // could first apply an efficient parallel sorting algorithm // to the particles and then accumulate moments in smaller diff --git a/include/Collective.h b/include/Collective.h index e4155bcf..415baa98 100644 --- a/include/Collective.h +++ b/include/Collective.h @@ -81,8 +81,8 @@ class Collective int getNpcelx(int nspecies)const{ return (npcelx[nspecies]); } int getNpcely(int nspecies)const{ return (npcely[nspecies]); } int getNpcelz(int nspecies)const{ return (npcelz[nspecies]); } - long getNp(int nspecies)const{ return (np[nspecies]); } - long getNpMax(int nspecies)const{ return (npMax[nspecies]); } + int getNp(int nspecies)const{ return (np[nspecies]); } + int getNpMax(int nspecies)const{ return (npMax[nspecies]); } double getNpMaxNpRatio()const{ return (NpMaxNpRatio); } double getQOM(int nspecies)const{ return (qom[nspecies]); } double getRHOinit(int nspecies)const{ return (rhoINIT[nspecies]); } @@ -206,9 +206,9 @@ class Collective /*! number of particles per cell - Z direction */ int *npcelz; /*! number of particles array for different species */ - long *np; + int *np; /*! maximum number of particles array for different species */ - long *npMax; + int *npMax; /*! max number of particles */ double NpMaxNpRatio; /*! charge to mass ratio array for different species */ diff --git a/include/Particles.h b/include/Particles.h index be97ac28..cca553c0 100644 --- a/include/Particles.h +++ b/include/Particles.h @@ -42,33 +42,33 @@ class Particles { /** get w (Z-velocity) array for all the particles */ virtual double *getWall() const = 0; /** get ID array for all the particles */ - virtual unsigned long *getParticleIDall() const = 0; + virtual long long *getParticleIDall() const = 0; /**get charge of particle array */ virtual double *getQall() const = 0; /** get X-position of particle with label indexPart */ - virtual double getX(long long indexPart) const = 0; + virtual double getX(int indexPart) const = 0; /** get Y-position of particle with label indexPart */ - virtual double getY(long long indexPart) const = 0; + virtual double getY(int indexPart) const = 0; /** get Z-position of particle with label indexPart */ - virtual double getZ(long long indexPart) const = 0; + virtual double getZ(int indexPart) const = 0; /** get u (X-velocity) of particle with label indexPart */ - virtual double getU(long long indexPart) const = 0; + virtual double getU(int indexPart) const = 0; /** get v (Y-velocity) of particle with label indexPart */ - virtual double getV(long long indexPart) const = 0; + virtual double getV(int indexPart) const = 0; /** get w (Z-velocity) of particle with label indexPart */ - virtual double getW(long long indexPart) const = 0; + virtual double getW(int indexPart) const = 0; /** get ID of particle with label indexPart */ - virtual unsigned long getParticleID(long long indexPart) const = 0; + virtual long long getParticleID(int indexPart) const = 0; /**get charge of particle with label indexPart */ - virtual double getQ(long long indexPart) const = 0; + virtual double getQ(int indexPart) const = 0; /** get the number of particles of this subdomain */ - virtual long long getNOP() const = 0; + virtual int getNOP() const = 0; /** return the Kinetic energy */ virtual double getKe() = 0; /** return the maximum kinetic energy */ virtual double getMaxVelocity() = 0; /** return energy distribution*/ - virtual unsigned long *getVelocityDistribution(int nBins, double maxVel) = 0; + virtual long long *getVelocityDistribution(int nBins, double maxVel) = 0; /** retturn the momentum */ virtual double getP() = 0; /** Print particles info: positions, velocities */ diff --git a/include/Particles3Dcomm.h b/include/Particles3Dcomm.h index 29600957..1e646681 100644 --- a/include/Particles3Dcomm.h +++ b/include/Particles3Dcomm.h @@ -32,19 +32,19 @@ class Particles3Dcomm:public Particles { /** method for communicating exiting particles to X-RIGHT, X-LEFT, Y-RIGHT, Y-LEFT, Z-RIGHT, Z-LEFT processes */ int communicate(VirtualTopology3D * ptVCT); /** put a particle exiting to X-LEFT in the bufferXLEFT for communication and check if you're sending the particle to the right subdomain*/ - void bufferXleft(double *b_, long long np, VirtualTopology3D * vct); + void bufferXleft(double *b_, int np, VirtualTopology3D * vct); /** put a particle exiting to X-RIGHT in the bufferXRIGHT for communication and check if you're sending the particle to the right subdomain*/ - void bufferXright(double *b_, long long np, VirtualTopology3D * vct); + void bufferXright(double *b_, int np, VirtualTopology3D * vct); /** put a particle exiting to Y-LEFT in the bufferYLEFT for communication and check if you're sending the particle to the right subdomain*/ - void bufferYleft(double *b_, long long np, VirtualTopology3D * vct); + void bufferYleft(double *b_, int np, VirtualTopology3D * vct); /** put a particle exiting to Y-RIGHT in the bufferYRIGHT for communication and check if you're sending the particle to the right subdomain*/ - void bufferYright(double *b_, long long np, VirtualTopology3D * vct); + void bufferYright(double *b_, int np, VirtualTopology3D * vct); /** put a particle exiting to Z-LEFT in the bufferZLEFT for communication and check if you're sending the particle to the right subdomain*/ - void bufferZleft(double *b_, long long np, VirtualTopology3D * vct); + void bufferZleft(double *b_, int np, VirtualTopology3D * vct); /** put a particle exiting to Z-RIGHT in the bufferZRIGHT for communication and check if you're sending the particle to the right subdomain*/ - void bufferZright(double *b_, long long np, VirtualTopology3D * vct); + void bufferZright(double *b_, int np, VirtualTopology3D * vct); /** Delete the a particle from a list(array) and pack the list(array) */ - void del_pack(long long np, long long *nplast); + void del_pack(int np, int *nplast); /** method to debuild the buffer received */ int unbuffer(double *b_); @@ -70,33 +70,33 @@ class Particles3Dcomm:public Particles { /** get w (Z-velocity) array for all the particles */ double *getWall() const; /** get the ID array */ - unsigned long *getParticleIDall() const; + long long *getParticleIDall() const; /** get X-position of particle with label indexPart */ - double getX(long long indexPart) const; + double getX(int indexPart) const; /** get Y-position of particle with label indexPart */ - double getY(long long indexPart) const; + double getY(int indexPart) const; /** get Z-position of particle with label indexPart */ - double getZ(long long indexPart) const; + double getZ(int indexPart) const; /** get u (X-velocity) of particle with label indexPart */ - double getU(long long indexPart) const; + double getU(int indexPart) const; /** get v (Y-velocity) of particle with label indexPart */ - double getV(long long indexPart) const; + double getV(int indexPart) const; /** get w (Z-velocity) of particle with label indexPart */ - double getW(long long indexPart) const; + double getW(int indexPart) const; /** get ID of particle with label indexPart */ - unsigned long getParticleID(long long indexPart) const; + long long getParticleID(int indexPart) const; /**get charge of particle with label indexPart */ - double getQ(long long indexPart) const; + double getQ(int indexPart) const; /** get charge of array for ID particles */ double *getQall() const; /** get the number of particles of this subdomain */ - long long getNOP() const; + int getNOP() const; /** return the Kinetic energy */ double getKe(); /** return the maximum kinetic energy */ double getMaxVelocity(); /** return energy distribution */ - unsigned long *getVelocityDistribution(int nBins, double maxVel); + long long *getVelocityDistribution(int nBins, double maxVel); /** return the momentum */ double getP(); /** Print particles info: positions, velocities */ @@ -112,9 +112,9 @@ class Particles3Dcomm:public Particles { /** number of this species */ int ns; /** maximum number of particles of this species on this domain. used for memory allocation */ - long long npmax; + int npmax; /** number of particles of this species on this domain */ - long long nop; + int nop; /** total number of particles */ long long np_tot; /** number of particles per cell */ @@ -156,7 +156,7 @@ class Particles3Dcomm:public Particles { /** TrackParticleID */ bool TrackParticleID; /** ParticleID */ - unsigned long *ParticleID; + long long *ParticleID; /** rank of processor in which particle is created (for ID) */ int BirthRank[2]; /** number of variables to be stored in buffer for communication for each particle */ diff --git a/include/iPic3D.h b/include/iPic3D.h index 440fdf02..67a72e1a 100644 --- a/include/iPic3D.h +++ b/include/iPic3D.h @@ -55,7 +55,7 @@ namespace iPic3D { double *Ke; double *momentum; double *Qremoved; - unsigned long *VelocityDist; + long long *VelocityDist; Timing *my_clock; PSK::OutputManager < PSK::OutputAdaptor > output_mgr; // Create an Output Manager diff --git a/inputoutput/Collective.cpp b/inputoutput/Collective.cpp index da721b76..708be195 100644 --- a/inputoutput/Collective.cpp +++ b/inputoutput/Collective.cpp @@ -2,6 +2,8 @@ #include #include "Collective.h" #include "debug.h" +#include "limits.h" // for INT_MAX +#include "asserts.h" // for assert_ge /*! Read the input file from text file and put the data in a collective wrapper: if it's a restart read from input file basic sim data and load particles and EM field from restart file */ void Collective::ReadInput(string inputfile) { @@ -28,6 +30,7 @@ void Collective::ReadInput(string inputfile) { RestartDirName = config.read < string > ("RestartDirName"); ns = config.read < int >("ns"); NpMaxNpRatio = config.read < double >("NpMaxNpRatio"); + assert_ge(NpMaxNpRatio, 1.); // GEM Challenge B0x = config.read ("B0x"); B0y = config.read ("B0y"); @@ -576,18 +579,20 @@ Collective::Collective(int argc, char **argv) { /*! npcel = number of particles per cell */ npcel = new int[ns]; /*! np = number of particles of different species */ - np = new long[ns]; + np = new int[ns]; /*! npMax = maximum number of particles of different species */ - npMax = new long[ns]; + npMax = new int[ns]; for (int i = 0; i < ns; i++) { npcel[i] = npcelx[i] * npcely[i] * npcelz[i]; np[i] = npcel[i] * nxc * nyc * nzc; - npMax[i] = (long) (NpMaxNpRatio * np[i]); + double npMaxi = (NpMaxNpRatio * np[i]); + // INT_MAX is about 2 billions, surely enough + // to index the particles in a single MPI process + assert_le(npMaxi, double(INT_MAX)); + npMax[i] = (int) npMaxi; } - - } /*! destructor */ diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 58e3fd44..7a0dd291 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -145,7 +145,7 @@ int c_Solver::Init(int argc, char **argv) { } // Distribution functions nDistributionBins = 1000; - VelocityDist = new unsigned long[nDistributionBins]; + long long *VelocityDist = new long long[nDistributionBins]; ds = SaveDirName + "/DistributionFunctions.txt"; if (myrank == 0) { ofstream my_file(ds.c_str()); diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 356bf4a3..ecb1dab7 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -67,7 +67,7 @@ Particles3D::~Particles3D() { /** particles are uniformly distributed with zero velocity */ void Particles3D::uniform_background(Grid * grid, Field * EMf) { - long long counter = 0; + int counter = 0; for (int i = 1; i < grid->getNXC() - 1; i++) for (int j = 1; j < grid->getNYC() - 1; j++) for (int k = 1; k < grid->getNZC() - 1; k++) @@ -82,7 +82,7 @@ void Particles3D::uniform_background(Grid * grid, Field * EMf) { w[counter] = 0.0; q[counter] = (qom / fabs(qom)) * (EMf->getRHOcs(i, j, k, ns) / npcel) * (1.0 / grid->getInvVOL()); if (TrackParticleID) - ParticleID[counter] = counter * (unsigned long) pow(10.0, BirthRank[1]) + BirthRank[0]; + ParticleID[counter] = counter * (long long) pow(10.0, BirthRank[1]) + BirthRank[0]; counter++; } @@ -100,15 +100,15 @@ void Particles3D::uniform_background(Grid * grid, Field * EMf) { void Particles3D::constantVelocity(double vel, int dim, Grid * grid, Field * EMf) { switch (dim) { case 0: - for (long long i = 0; i < nop; i++) + for (int i = 0; i < nop; i++) u[i] = vel, v[i] = 0.0, w[i] = 0.0; break; case 1: - for (register long long i = 0; i < nop; i++) + for (int i = 0; i < nop; i++) u[i] = 0.0, v[i] = vel, w[i] = 0.0; break; case 2: - for (register long long i = 0; i < nop; i++) + for (int i = 0; i < nop; i++) u[i] = 0.0, v[i] = 0.0, w[i] = vel; break; @@ -137,7 +137,7 @@ void Particles3D::MaxwellianFromFluid(Grid* grid,Field* EMf,VirtualTopology3D* v MaxwellianFromFluidCell(grid,col,is, i,j,k,counter,x,y,z,q,u,v,w,ParticleID); } -void Particles3D::MaxwellianFromFluidCell(Grid* grid, Collective *col, int is, int i, int j, int k, int &ip, double *x, double *y, double *z, double *q, double *vx, double *vy, double *vz, unsigned long* ParticleID) +void Particles3D::MaxwellianFromFluidCell(Grid* grid, Collective *col, int is, int i, int j, int k, int &ip, double *x, double *y, double *z, double *q, double *vx, double *vy, double *vz, long long* ParticleID) { /* * grid : local grid object (in) @@ -179,7 +179,7 @@ void Particles3D::MaxwellianFromFluidCell(Grid* grid, Collective *col, int is, i theta = 2.0*M_PI*harvest; w[ip] = col->getFluidUz(i,j,k,is) + col->getFluidUthz(i,j,k,is)*prob*cos(theta); if (TrackParticleID) - ParticleID[ip]= ip*(unsigned long)pow(10.0,BirthRank[1])+BirthRank[0]; + ParticleID[ip]= ip*(long long)pow(10.0,BirthRank[1])+BirthRank[0]; ip++ ; } } @@ -193,7 +193,7 @@ void Particles3D::maxwellian(Grid * grid, Field * EMf, VirtualTopology3D * vct) double harvest; double prob, theta, sign; - long long counter = 0; + int counter = 0; for (int i = 1; i < grid->getNXC() - 1; i++) for (int j = 1; j < grid->getNYC() - 1; j++) for (int k = 1; k < grid->getNZC() - 1; k++) @@ -220,7 +220,7 @@ void Particles3D::maxwellian(Grid * grid, Field * EMf, VirtualTopology3D * vct) theta = 2.0 * M_PI * harvest; w[counter] = w0 + wth * prob * cos(theta); if (TrackParticleID) - ParticleID[counter] = counter * (unsigned long) pow(10.0, BirthRank[1]) + BirthRank[0]; + ParticleID[counter] = counter * (long long) pow(10.0, BirthRank[1]) + BirthRank[0]; counter++; @@ -234,7 +234,7 @@ void Particles3D::force_free(Grid * grid, Field * EMf, VirtualTopology3D * vct) double harvest, prob, theta; - long long counter = 0; + int counter = 0; double shaperx, shapery, shaperz; double flvx = 1.0, flvy = 1.0, flvz = 1.0; @@ -281,7 +281,7 @@ void Particles3D::force_free(Grid * grid, Field * EMf, VirtualTopology3D * vct) w[counter] = flvz + wth * prob * cos(theta); } if (TrackParticleID) - ParticleID[counter] = counter * (unsigned long) pow(10.0, BirthRank[1]) + BirthRank[0]; + ParticleID[counter] = counter * (long long) pow(10.0, BirthRank[1]) + BirthRank[0]; counter++; } @@ -297,7 +297,7 @@ void Particles3D::AddPerturbationJ(double deltaBoB, double kx, double ky, double jx_mod *= alpha; jy_mod *= alpha; jz_mod *= alpha; - for (register long long i = 0; i < nop; i++) { + for (int i = 0; i < nop; i++) { u[i] += jx_mod / q[i] / npcel / invVOL * cos(kx * x[i] + ky * y[i] + jx_phase); v[i] += jy_mod / q[i] / npcel / invVOL * cos(kx * x[i] + ky * y[i] + jy_phase); w[i] += jz_mod / q[i] / npcel / invVOL * cos(kx * x[i] + ky * y[i] + jz_phase); @@ -342,7 +342,6 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { const pfloat dto2 = .5 * dt, qomdt2 = qom * dto2 / c; const pfloat inv_dx = 1.0 / dx, inv_dy = 1.0 / dy, inv_dz = 1.0 / dz; - assert_le(nop,(long long) INT_MAX); // else would need to use long long // don't bother trying to push any particles simultaneously; // MIC already does vectorization automatically, and trying // to do it by hand only hurts performance. @@ -677,15 +676,15 @@ int Particles3D::particle_repopulator(Grid* grid,VirtualTopology3D* vct, Field* } double FourPI =16*atan(1.0); int avail; - long long store_nop=nop; + int store_nop=nop; //////////////////////// // INJECTION FROM XLEFT //////////////////////// srand (vct->getCartesian_rank()+1+ns+(int(MPI_Wtime()))%10000); if (vct->getXleft_neighbor() == MPI_PROC_NULL && bcPfaceXleft == 2){ // use Field topology in this case - long long particles_index=0; - long long nplast = nop-1; + int particles_index=0; + int nplast = nop-1; while (particles_index < nplast+1) { if (x[particles_index] < 3.0*dx ) { @@ -729,7 +728,7 @@ int Particles3D::particle_repopulator(Grid* grid,VirtualTopology3D* vct, Field* theta = 2.0*M_PI*harvest; w[particles_index] = w0 + wth*prob*cos(theta); if (TrackParticleID) - ParticleID[particles_index]= particles_index*(unsigned long)pow(10.0,BirthRank[1])+BirthRank[0]; + ParticleID[particles_index]= particles_index*(long long)pow(10.0,BirthRank[1])+BirthRank[0]; particles_index++ ; @@ -745,8 +744,8 @@ int Particles3D::particle_repopulator(Grid* grid,VirtualTopology3D* vct, Field* srand (vct->getCartesian_rank()+1+ns+(int(MPI_Wtime()))%10000); if (vct->getYleft_neighbor() == MPI_PROC_NULL && bcPfaceYleft == 2) { - long long particles_index=0; - long long nplast = nop-1; + int particles_index=0; + int nplast = nop-1; while (particles_index < nplast+1) { if (y[particles_index] < 3.0*dy ) { del_pack(particles_index,&nplast); @@ -788,7 +787,7 @@ int Particles3D::particle_repopulator(Grid* grid,VirtualTopology3D* vct, Field* theta = 2.0*M_PI*harvest; w[particles_index] = w0 + wth*prob*cos(theta); if (TrackParticleID) - ParticleID[particles_index]= particles_index*(unsigned long)pow(10.0,BirthRank[1])+BirthRank[0]; + ParticleID[particles_index]= particles_index*(long long)pow(10.0,BirthRank[1])+BirthRank[0]; particles_index++ ; } @@ -801,8 +800,8 @@ int Particles3D::particle_repopulator(Grid* grid,VirtualTopology3D* vct, Field* srand (vct->getCartesian_rank()+1+ns+(int(MPI_Wtime()))%10000); if (vct->getZleft_neighbor() == MPI_PROC_NULL && bcPfaceZleft == 2) { - long long particles_index=0; - long long nplast = nop-1; + int particles_index=0; + int nplast = nop-1; while (particles_index < nplast+1) { if (z[particles_index] < 3.0*dz ) { del_pack(particles_index,&nplast); @@ -844,7 +843,7 @@ int Particles3D::particle_repopulator(Grid* grid,VirtualTopology3D* vct, Field* theta = 2.0*M_PI*harvest; w[particles_index] = w0 + wth*prob*cos(theta); if (TrackParticleID) - ParticleID[particles_index]= particles_index*(unsigned long)pow(10.0,BirthRank[1])+BirthRank[0]; + ParticleID[particles_index]= particles_index*(long long)pow(10.0,BirthRank[1])+BirthRank[0]; particles_index++ ; } @@ -856,8 +855,8 @@ int Particles3D::particle_repopulator(Grid* grid,VirtualTopology3D* vct, Field* //////////////////////// srand (vct->getCartesian_rank()+1+ns+(int(MPI_Wtime()))%10000); if (vct->getXright_neighbor() == MPI_PROC_NULL && bcPfaceXright == 2){ - long long particles_index=0; - long long nplast = nop-1; + int particles_index=0; + int nplast = nop-1; while (particles_index < nplast+1) { if (x[particles_index] > (Lx-3.0*dx) ) { del_pack(particles_index,&nplast); @@ -899,7 +898,7 @@ int Particles3D::particle_repopulator(Grid* grid,VirtualTopology3D* vct, Field* theta = 2.0*M_PI*harvest; w[particles_index] = w0 + wth*prob*cos(theta); if (TrackParticleID) - ParticleID[particles_index]= particles_index*(unsigned long)pow(10.0,BirthRank[1])+BirthRank[0]; + ParticleID[particles_index]= particles_index*(long long)pow(10.0,BirthRank[1])+BirthRank[0]; particles_index++ ; } @@ -912,8 +911,8 @@ int Particles3D::particle_repopulator(Grid* grid,VirtualTopology3D* vct, Field* srand (vct->getCartesian_rank()+1+ns+(int(MPI_Wtime()))%10000); if (vct->getYright_neighbor() == MPI_PROC_NULL && bcPfaceYright == 2) { - long long particles_index=0; - long long nplast = nop-1; + int particles_index=0; + int nplast = nop-1; while (particles_index < nplast+1) { if (y[particles_index] > (Ly-3.0*dy) ) { del_pack(particles_index,&nplast); @@ -955,7 +954,7 @@ int Particles3D::particle_repopulator(Grid* grid,VirtualTopology3D* vct, Field* theta = 2.0*M_PI*harvest; w[particles_index] = w0 + wth*prob*cos(theta); if (TrackParticleID) - ParticleID[particles_index]= particles_index*(unsigned long)pow(10.0,BirthRank[1])+BirthRank[0]; + ParticleID[particles_index]= particles_index*(long long)pow(10.0,BirthRank[1])+BirthRank[0]; particles_index++ ; } @@ -968,8 +967,8 @@ int Particles3D::particle_repopulator(Grid* grid,VirtualTopology3D* vct, Field* srand (vct->getCartesian_rank()+1+ns+(int(MPI_Wtime()))%10000); if (vct->getZright_neighbor() == MPI_PROC_NULL && bcPfaceZright == 2) { - long long particles_index=0; - long long nplast = nop-1; + int particles_index=0; + int nplast = nop-1; while (particles_index < nplast+1) { if (z[particles_index] > (Lz-3.0*dz) ) { del_pack(particles_index,&nplast); @@ -1011,7 +1010,7 @@ int Particles3D::particle_repopulator(Grid* grid,VirtualTopology3D* vct, Field* theta = 2.0*M_PI*harvest; w[particles_index] = w0 + wth*prob*cos(theta); if (TrackParticleID) - ParticleID[particles_index]= particles_index*(unsigned long)pow(10.0,BirthRank[1])+BirthRank[0]; + ParticleID[particles_index]= particles_index*(long long)pow(10.0,BirthRank[1])+BirthRank[0]; particles_index++ ; } @@ -1046,7 +1045,7 @@ int Particles3D::particle_repopulator(Grid* grid,VirtualTopology3D* vct, Field* void Particles3D::linear_perturbation(double deltaBoB, double kx, double ky, double angle, double omega_r, double omega_i, double Ex_mod, double Ex_phase, double Ey_mod, double Ey_phase, double Ez_mod, double Ez_phase, double Bx_mod, double Bx_phase, double By_mod, double By_phase, double Bz_mod, double Bz_phase, Grid * grid, Field * EMf, VirtualTopology3D * vct) { double value1 = 0.0, value2 = 0.0, max_value = 0.0, min_value = 0.0, phi, n; - long long counter = 0, total_generated = 0; + int counter = 0, total_generated = 0; bool rejected; double harvest, prob, theta; // rescaling of amplitudes according to deltaBoB // @@ -1125,7 +1124,7 @@ void Particles3D::linear_perturbation(double deltaBoB, double kx, double ky, dou } if (TrackParticleID) - ParticleID[counter] = counter * (unsigned long) pow(10.0, BirthRank[1]) + BirthRank[0]; + ParticleID[counter] = counter * (long long) pow(10.0, BirthRank[1]) + BirthRank[0]; counter++; } nop = counter + 1; @@ -1216,7 +1215,7 @@ double Particles3D::f0(double vpar, double vperp) { void Particles3D::RotatePlaneXY(double theta) { double temp, temp2; - for (register long long s = 0; s < nop; s++) { + for (register int s = 0; s < nop; s++) { temp = u[s]; temp2 = v[s]; u[s] = temp * cos(theta) + v[s] * sin(theta); @@ -1227,8 +1226,8 @@ void Particles3D::RotatePlaneXY(double theta) { /*! Delete the particles inside the sphere with radius R and center x_center y_center and return the total charge removed */ double Particles3D::deleteParticlesInsideSphere(double R, double x_center, double y_center, double z_center){ - long long np_current = 0; - long long nplast = nop-1; + int np_current = 0; + int nplast = nop-1; while (np_current < nplast+1){ diff --git a/particles/Particles3Dcomm.cpp b/particles/Particles3Dcomm.cpp index 1120de1c..f1c0c5c5 100644 --- a/particles/Particles3Dcomm.cpp +++ b/particles/Particles3Dcomm.cpp @@ -146,7 +146,7 @@ void Particles3Dcomm::allocate(int species, CollectiveIO * col, VirtualTopology3 q = new double[npmax]; // ID if (TrackParticleID) { - ParticleID = new unsigned long[npmax]; + ParticleID = new long long[npmax]; BirthRank[0] = vct->getCartesian_rank(); if (vct->getNprocs() > 1) BirthRank[1] = (int) ceil(log10((double) (vct->getNprocs()))); // Number of digits needed for # of process in ID @@ -154,7 +154,7 @@ void Particles3Dcomm::allocate(int species, CollectiveIO * col, VirtualTopology3 BirthRank[1] = 1; if (BirthRank[1] + (int) ceil(log10((double) (npmax))) > 10 && BirthRank[0] == 0) { cerr << "Error: can't Track particles in Particles3Dcomm::allocate" << endl; - cerr << "Unsigned long 'ParticleID' cannot store all the particles" << endl; + cerr << "long long 'ParticleID' cannot store all the particles" << endl; return; } } @@ -271,8 +271,8 @@ void Particles3Dcomm::allocate(int species, CollectiveIO * col, VirtualTopology3 if (dataset_id > 0) status = H5Dread(dataset_id, H5T_NATIVE_ULONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, ParticleID); else { - for (register long long counter = 0; counter < nop; counter++) - ParticleID[counter] = counter * (unsigned long) pow(10.0, BirthRank[1]) + BirthRank[0]; + for (int counter = 0; counter < nop; counter++) + ParticleID[counter] = counter * (long long) pow(10.0, BirthRank[1]) + BirthRank[0]; } } // close the hdf file @@ -291,7 +291,7 @@ void Particles3Dcomm::interpP2G(Field * EMf, Grid * grid, VirtualTopology3D * vc const double nxn = grid->getNXN(); const double nyn = grid->getNYN(); const double nzn = grid->getNZN(); - assert_le(nop,(long long)INT_MAX); // else would need to use long long + // assert_le(nop,(long long)INT_MAX); // else would need to use long long // to make memory use scale to a large number of threads we // could first apply an efficient parallel sorting algorithm // to the particles and then accumulate moments in smaller @@ -395,7 +395,7 @@ int Particles3Dcomm::communicate(VirtualTopology3D * ptVCT) { b_Z_LEFT[i] = MIN_VAL; } npExitXright = 0, npExitXleft = 0, npExitYright = 0, npExitYleft = 0, npExitZright = 0, npExitZleft = 0, npExit = 0, rightDomain = 0; - long long np_current = 0, nplast = nop - 1; + int np_current = 0, nplast = nop - 1; while (np_current < nplast+1){ @@ -647,7 +647,7 @@ void Particles3Dcomm::resize_buffers(int new_buffer_size) { buffer_size = new_buffer_size; } /** put a particle exiting to X-LEFT in the bufferXLEFT for communication and check if you're sending the particle to the right subdomain*/ -void Particles3Dcomm::bufferXleft(double *b_, long long np_current, VirtualTopology3D * vct) { +void Particles3Dcomm::bufferXleft(double *b_, int np_current, VirtualTopology3D * vct) { if (x[np_current] < 0) b_[npExitXleft * nVar] = x[np_current] + Lx; // this applies to the the leftmost processor else @@ -662,7 +662,7 @@ void Particles3Dcomm::bufferXleft(double *b_, long long np_current, VirtualTopol b_[npExitXleft * nVar + 7] = ParticleID[np_current]; } /** put a particle exiting to X-RIGHT in the bufferXRIGHT for communication and check if you're sending the particle to the right subdomain*/ -void Particles3Dcomm::bufferXright(double *b_, long long np_current, VirtualTopology3D * vct) { +void Particles3Dcomm::bufferXright(double *b_, int np_current, VirtualTopology3D * vct) { if (x[np_current] > Lx) b_[npExitXright * nVar] = x[np_current] - Lx; // this applies to the right most processor else @@ -677,7 +677,7 @@ void Particles3Dcomm::bufferXright(double *b_, long long np_current, VirtualTopo b_[npExitXright * nVar + 7] = ParticleID[np_current]; } /** put a particle exiting to Y-LEFT in the bufferYLEFT for communication and check if you're sending the particle to the right subdomain*/ -inline void Particles3Dcomm::bufferYleft(double *b_, long long np_current, VirtualTopology3D * vct) { +inline void Particles3Dcomm::bufferYleft(double *b_, int np_current, VirtualTopology3D * vct) { b_[npExitYleft * nVar] = x[np_current]; if (y[np_current] < 0) b_[npExitYleft * nVar + 1] = y[np_current] + Ly; @@ -692,7 +692,7 @@ inline void Particles3Dcomm::bufferYleft(double *b_, long long np_current, Virtu b_[npExitYleft * nVar + 7] = ParticleID[np_current]; } /** put a particle exiting to Y-RIGHT in the bufferYRIGHT for communication and check if you're sending the particle to the right subdomain*/ -inline void Particles3Dcomm::bufferYright(double *b_, long long np_current, VirtualTopology3D * vct) { +inline void Particles3Dcomm::bufferYright(double *b_, int np_current, VirtualTopology3D * vct) { b_[npExitYright * nVar] = x[np_current]; if (y[np_current] > Ly) b_[npExitYright * nVar + 1] = y[np_current] - Ly; @@ -707,7 +707,7 @@ inline void Particles3Dcomm::bufferYright(double *b_, long long np_current, Virt b_[npExitYright * nVar + 7] = ParticleID[np_current]; } /** put a particle exiting to Z-LEFT in the bufferZLEFT for communication and check if you're sending the particle to the right subdomain*/ -inline void Particles3Dcomm::bufferZleft(double *b_, long long np_current, VirtualTopology3D * vct) { +inline void Particles3Dcomm::bufferZleft(double *b_, int np_current, VirtualTopology3D * vct) { b_[npExitZleft * nVar] = x[np_current]; b_[npExitZleft * nVar + 1] = y[np_current]; if (z[np_current] < 0) @@ -722,7 +722,7 @@ inline void Particles3Dcomm::bufferZleft(double *b_, long long np_current, Virtu b_[npExitZleft * nVar + 7] = ParticleID[np_current]; } /** put a particle exiting to Z-RIGHT in the bufferZRIGHT for communication and check if you're sending the particle to the right subdomain*/ -inline void Particles3Dcomm::bufferZright(double *b_, long long np_current, VirtualTopology3D * vct) { +inline void Particles3Dcomm::bufferZright(double *b_, int np_current, VirtualTopology3D * vct) { b_[npExitZright * nVar] = x[np_current]; b_[npExitZright * nVar + 1] = y[np_current]; if (z[np_current] > Lz) @@ -738,7 +738,7 @@ inline void Particles3Dcomm::bufferZright(double *b_, long long np_current, Virt } /** This unbuffer the last communication */ int Particles3Dcomm::unbuffer(double *b_) { - long long np_current = 0; + int np_current = 0; // put the new particles at the end of the array, and update the number of particles while (b_[np_current * nVar] != MIN_VAL) { x[nop] = b_[nVar * np_current]; @@ -749,7 +749,7 @@ int Particles3Dcomm::unbuffer(double *b_) { w[nop] = b_[nVar * np_current + 5]; q[nop] = b_[nVar * np_current + 6]; if (TrackParticleID) - ParticleID[nop] = (unsigned long) b_[nVar * np_current + 7]; + ParticleID[nop] = (long long) b_[nVar * np_current + 7]; np_current++; // these particles need further communication if (x[nop] < xstart || x[nop] > xend || y[nop] < ystart || y[nop] > yend || z[nop] < zstart || z[nop] > zend) @@ -770,7 +770,7 @@ int Particles3Dcomm::unbuffer(double *b_) { * @param np = the index of the particle that must be deleted * @param nplast = the index of the last particle in the array */ -void Particles3Dcomm::del_pack(long long np_current, long long *nplast) { +void Particles3Dcomm::del_pack(int np_current, int *nplast) { x[np_current] = x[*nplast]; y[np_current] = y[*nplast]; z[np_current] = z[*nplast]; @@ -834,7 +834,7 @@ double *Particles3Dcomm::getWall() const { return (w); } /**get ID of particle with label indexPart */ -unsigned long *Particles3Dcomm::getParticleIDall() const { +long long *Particles3Dcomm::getParticleIDall() const { return (ParticleID); } /**get charge of particle with label indexPart */ @@ -842,45 +842,46 @@ double *Particles3Dcomm::getQall() const { return (q); } /** return X-coordinate of particle with index indexPart */ -double Particles3Dcomm::getX(long long indexPart) const { +double Particles3Dcomm::getX(int indexPart) const { return (x[indexPart]); } /** return Y-coordinate of particle with index indexPart */ -double Particles3Dcomm::getY(long long indexPart) const { +double Particles3Dcomm::getY(int indexPart) const { return (y[indexPart]); } /** return Y-coordinate of particle with index indexPart */ -double Particles3Dcomm::getZ(long long indexPart) const { +double Particles3Dcomm::getZ(int indexPart) const { return (z[indexPart]); } /** get u (X-velocity) of particle with label indexPart */ -double Particles3Dcomm::getU(long long indexPart) const { +double Particles3Dcomm::getU(int indexPart) const { return (u[indexPart]); } /** get v (Y-velocity) of particle with label indexPart */ -double Particles3Dcomm::getV(long long indexPart) const { +double Particles3Dcomm::getV(int indexPart) const { return (v[indexPart]); } /**get w (Z-velocity) of particle with label indexPart */ -double Particles3Dcomm::getW(long long indexPart) const { +double Particles3Dcomm::getW(int indexPart) const { return (w[indexPart]); } /**get ID of particle with label indexPart */ -unsigned long Particles3Dcomm::getParticleID(long long indexPart) const { +long long Particles3Dcomm::getParticleID(int indexPart) const { return (ParticleID[indexPart]); } /**get charge of particle with label indexPart */ -double Particles3Dcomm::getQ(long long indexPart) const { +double Particles3Dcomm::getQ(int indexPart) const { return (q[indexPart]); } -/** return the number of particles */ long long Particles3Dcomm::getNOP() const { +/** return the number of particles */ +int Particles3Dcomm::getNOP() const { return (nop); } /** return the Kinetic energy */ double Particles3Dcomm::getKe() { double localKe = 0.0; double totalKe = 0.0; - for (register long long i = 0; i < nop; i++) + for (register int i = 0; i < nop; i++) localKe += .5 * (q[i] / qom) * (u[i] * u[i] + v[i] * v[i] + w[i] * w[i]); MPI_Allreduce(&localKe, &totalKe, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); return (totalKe); @@ -889,7 +890,7 @@ double Particles3Dcomm::getKe() { double Particles3Dcomm::getP() { double localP = 0.0; double totalP = 0.0; - for (register long long i = 0; i < nop; i++) + for (register int i = 0; i < nop; i++) localP += (q[i] / qom) * sqrt(u[i] * u[i] + v[i] * v[i] + w[i] * w[i]); MPI_Allreduce(&localP, &totalP, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); return (totalP); @@ -899,7 +900,7 @@ double Particles3Dcomm::getP() { double Particles3Dcomm::getMaxVelocity() { double localVel = 0.0; double maxVel = 0.0; - for (long long i = 0; i < nop; i++) + for (int i = 0; i < nop; i++) localVel = max(localVel, sqrt(u[i] * u[i] + v[i] * v[i] + w[i] * w[i])); MPI_Allreduce(&localVel, &maxVel, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); return (maxVel); @@ -907,14 +908,14 @@ double Particles3Dcomm::getMaxVelocity() { /** get energy spectrum */ -unsigned long *Particles3Dcomm::getVelocityDistribution(int nBins, double maxVel) { - unsigned long *f = new unsigned long[nBins]; +long long *Particles3Dcomm::getVelocityDistribution(int nBins, double maxVel) { + long long *f = new long long[nBins]; for (int i = 0; i < nBins; i++) f[i] = 0; double Vel = 0.0; double dv = maxVel / nBins; int bin = 0; - for (long long i = 0; i < nop; i++) { + for (int i = 0; i < nop; i++) { Vel = sqrt(u[i] * u[i] + v[i] * v[i] + w[i] * w[i]); bin = int (floor(Vel / dv)); if (bin >= nBins) @@ -922,8 +923,8 @@ unsigned long *Particles3Dcomm::getVelocityDistribution(int nBins, double maxVel else f[bin] += 1; } - unsigned long localN = 0; - unsigned long totalN = 0; + long long localN = 0; + long long totalN = 0; for (int i = 0; i < nBins; i++) { localN = f[i]; MPI_Allreduce(&localN, &totalN, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); @@ -942,7 +943,7 @@ void Particles3Dcomm::Print(VirtualTopology3D * ptVCT) const { cout << "Yin = " << ystart << "; Yfin = " << yend << endl; cout << "Zin = " << zstart << "; Zfin = " << zend << endl; cout << "Number of species = " << ns << endl; - for (long long i = 0; i < nop; i++) + for (int i = 0; i < nop; i++) cout << "Particles #" << i << " x=" << x[i] << " y=" << y[i] << " z=" << z[i] << " u=" << u[i] << " v=" << v[i] << " w=" << w[i] << endl; cout << endl; } From e4ba3d54c607cb915f86e751599651f091766041 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 8 Oct 2013 15:31:41 +0200 Subject: [PATCH 054/118] made VelocityDist a local variable (follow-up to 93b043f5093) --- include/iPic3D.h | 1 - main/iPic3Dlib.cpp | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/include/iPic3D.h b/include/iPic3D.h index 67a72e1a..9a79c131 100644 --- a/include/iPic3D.h +++ b/include/iPic3D.h @@ -55,7 +55,6 @@ namespace iPic3D { double *Ke; double *momentum; double *Qremoved; - long long *VelocityDist; Timing *my_clock; PSK::OutputManager < PSK::OutputAdaptor > output_mgr; // Create an Output Manager diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 7a0dd291..96216d78 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -145,7 +145,6 @@ int c_Solver::Init(int argc, char **argv) { } // Distribution functions nDistributionBins = 1000; - long long *VelocityDist = new long long[nDistributionBins]; ds = SaveDirName + "/DistributionFunctions.txt"; if (myrank == 0) { ofstream my_file(ds.c_str()); @@ -307,7 +306,7 @@ void c_Solver::WriteConserved(int cycle) { // Velocity distribution for (int is = 0; is < ns; is++) { double maxVel = part[is].getMaxVelocity(); - VelocityDist = part[is].getVelocityDistribution(nDistributionBins, maxVel); + long long *VelocityDist = part[is].getVelocityDistribution(nDistributionBins, maxVel); if (myrank == 0) { ofstream my_file(ds.c_str(), fstream::app); my_file << cycle << "\t" << is << "\t" << maxVel; From 3522db61d3d96d639df3c41417f0e025e938c55e Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 8 Oct 2013 17:29:55 +0200 Subject: [PATCH 055/118] corrected MPI_UNSIGNED_LONG_LONG to MPI_LONG_LONG in getVelocityDistribution --- particles/Particles3Dcomm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/particles/Particles3Dcomm.cpp b/particles/Particles3Dcomm.cpp index f1c0c5c5..13a443c0 100644 --- a/particles/Particles3Dcomm.cpp +++ b/particles/Particles3Dcomm.cpp @@ -927,7 +927,7 @@ long long *Particles3Dcomm::getVelocityDistribution(int nBins, double maxVel) { long long totalN = 0; for (int i = 0; i < nBins; i++) { localN = f[i]; - MPI_Allreduce(&localN, &totalN, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&localN, &totalN, 1, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); f[i] = totalN; } return f; From 90334a2f978d01cb0d16a9a69e6530aa518587bb Mon Sep 17 00:00:00 2001 From: eajohnson Date: Thu, 10 Oct 2013 12:17:27 +0200 Subject: [PATCH 056/118] issue #54: TimeTasks now supports non-exclusive tasks and --- communication/ComNodes3D.cpp | 50 +++----- fields/EMfields3D.cpp | 10 +- iPic3D.cpp | 7 ++ include/ComNodes3D.h | 3 - include/TimeTasks.h | 182 ++++++++++++++++++++++++----- include/iPic3D.h | 4 +- main/iPic3Dlib.cpp | 60 +++++----- particles/Particles3D.cpp | 3 +- utility/TimeTasks.cpp | 220 +++++++++++++++++++++-------------- 9 files changed, 343 insertions(+), 196 deletions(-) diff --git a/communication/ComNodes3D.cpp b/communication/ComNodes3D.cpp index 0d97ce89..6b0bb169 100644 --- a/communication/ComNodes3D.cpp +++ b/communication/ComNodes3D.cpp @@ -7,7 +7,7 @@ /** communicate ghost cells (FOR NODES) */ void communicateNode(int nx, int ny, int nz, arr3_double _vector, VirtualTopology3D * vct) { - timeTasks.start_communicate(); + timeTasks_set_communicating(); double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces @@ -106,12 +106,10 @@ void communicateNode(int nx, int ny, int nz, arr3_double _vector, VirtualTopolog delete[]ghostXrightYrightZsameEdge; delete[]ghostXleftYleftZsameEdge; delete[]ghostXleftYrightZsameEdge; - - timeTasks.addto_communicate(); } /** communicate ghost cells (FOR NODES) */ void communicateNodeBC(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { - timeTasks.start_communicate(); + timeTasks_set_communicating(); double ***vector = _vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; @@ -212,12 +210,10 @@ void communicateNodeBC(int nx, int ny, int nz, arr3_double _vector, int bcFaceXr delete[]ghostXrightYrightZsameEdge; delete[]ghostXleftYleftZsameEdge; delete[]ghostXleftYrightZsameEdge; - - timeTasks.addto_communicate(); } /** communicate ghost cells (FOR NODES) with particles BC*/ void communicateNodeBC_P(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { - timeTasks.start_communicate(); + timeTasks_set_communicating(); double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; @@ -318,13 +314,11 @@ void communicateNodeBC_P(int nx, int ny, int nz, arr3_double _vector, int bcFace delete[]ghostXrightYrightZsameEdge; delete[]ghostXleftYleftZsameEdge; delete[]ghostXleftYrightZsameEdge; - - timeTasks.addto_communicate(); } /** SPECIES: communicate ghost cells */ void communicateNode(int nx, int ny, int nz, arr4_double _vector, int ns, VirtualTopology3D * vct) { - timeTasks.start_communicate(); + timeTasks_set_communicating(); double ****vector = _vector.fetch_arr4(); // allocate 6 ghost cell Faces @@ -422,14 +416,12 @@ void communicateNode(int nx, int ny, int nz, arr4_double _vector, int ns, Virtua delete[]ghostXrightYrightZsameEdge; delete[]ghostXleftYleftZsameEdge; delete[]ghostXleftYrightZsameEdge; - - timeTasks.addto_communicate(); -} // +} // PARTICLES /** SPECIES: communicate ghost cells */ void communicateNode_P(int nx, int ny, int nz, arr4_double _vector, int ns, VirtualTopology3D * vct) { - timeTasks.start_communicate(); + timeTasks_set_communicating(); double ****vector = _vector.fetch_arr4(); // allocate 6 ghost cell Faces @@ -527,14 +519,12 @@ void communicateNode_P(int nx, int ny, int nz, arr4_double _vector, int ns, Virt delete[]ghostXrightYrightZsameEdge; delete[]ghostXleftYleftZsameEdge; delete[]ghostXleftYrightZsameEdge; - - timeTasks.addto_communicate(); } // /** communicate ghost cells (FOR CENTERS) */ void communicateCenter(int nx, int ny, int nz, arr3_double _vector, VirtualTopology3D * vct) { - timeTasks.start_communicate(); + timeTasks_set_communicating(); double ***vector = _vector.fetch_arr3(); // allocate 6 ghost cell Faces @@ -631,12 +621,10 @@ void communicateCenter(int nx, int ny, int nz, arr3_double _vector, VirtualTopol delete[]ghostXrightYrightZsameEdge; delete[]ghostXleftYleftZsameEdge; delete[]ghostXleftYrightZsameEdge; - - timeTasks.addto_communicate(); } /** communicate ghost cells (FOR CENTERS) with BOX stencil*/ void communicateCenterBoxStencilBC(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { - timeTasks.start_communicate(); + timeTasks_set_communicating(); double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; @@ -664,12 +652,11 @@ void communicateCenterBoxStencilBC(int nx, int ny, int nz, arr3_double _vector, delete[]ghostYleftFace; delete[]ghostZrightFace; delete[]ghostZleftFace; - timeTasks.addto_communicate(); } // particles /** communicate ghost cells (FOR CENTERS) with BOX stencil*/ void communicateCenterBoxStencilBC_P(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { - timeTasks.start_communicate(); + timeTasks_set_communicating(); double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; @@ -697,14 +684,13 @@ void communicateCenterBoxStencilBC_P(int nx, int ny, int nz, arr3_double _vector delete[]ghostYleftFace; delete[]ghostZrightFace; delete[]ghostZleftFace; - timeTasks.addto_communicate(); } // void communicateNodeBoxStencilBC(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { - timeTasks.start_communicate(); + timeTasks_set_communicating(); double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; @@ -732,11 +718,10 @@ void communicateNodeBoxStencilBC(int nx, int ny, int nz, arr3_double _vector, in delete[]ghostYleftFace; delete[]ghostZrightFace; delete[]ghostZleftFace; - timeTasks.addto_communicate(); } void communicateNodeBoxStencilBC_P(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { - timeTasks.start_communicate(); + timeTasks_set_communicating(); double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; @@ -764,14 +749,13 @@ void communicateNodeBoxStencilBC_P(int nx, int ny, int nz, arr3_double _vector, delete[]ghostYleftFace; delete[]ghostZrightFace; delete[]ghostZleftFace; - timeTasks.addto_communicate(); } /** SPECIES: communicate ghost cells */ void communicateCenter(int nx, int ny, int nz, arr4_double _vector, int ns, VirtualTopology3D * vct) { - timeTasks.start_communicate(); + timeTasks_set_communicating(); double ****vector=_vector.fetch_arr4(); // allocate 6 ghost cell Faces @@ -867,12 +851,10 @@ void communicateCenter(int nx, int ny, int nz, arr4_double _vector, int ns, Virt delete[]ghostXrightYrightZsameEdge; delete[]ghostXleftYleftZsameEdge; delete[]ghostXleftYrightZsameEdge; - - timeTasks.addto_communicate(); } // /////////// communication + BC //////////////////////////// void communicateCenterBC(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { - timeTasks.start_communicate(); + timeTasks_set_communicating(); double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces @@ -971,12 +953,10 @@ void communicateCenterBC(int nx, int ny, int nz, arr3_double _vector, int bcFace delete[]ghostXrightYrightZsameEdge; delete[]ghostXleftYleftZsameEdge; delete[]ghostXleftYrightZsameEdge; - - timeTasks.addto_communicate(); } // /////////// communication + BC //////////////////////////// void communicateCenterBC_P(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { - timeTasks.start_communicate(); + timeTasks_set_communicating(); double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces @@ -1075,6 +1055,4 @@ void communicateCenterBC_P(int nx, int ny, int nz, arr3_double _vector, int bcFa delete[]ghostXrightYrightZsameEdge; delete[]ghostXleftYleftZsameEdge; delete[]ghostXleftYrightZsameEdge; - - timeTasks.addto_communicate(); } diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 71872157..4abc4585 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -250,6 +250,7 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop #pragma omp parallel { int thread_num = omp_get_thread_num(); + if(!thread_num) { timeTasks_begin_task(TimeTasks::MOMENT_ACCUMULATION); } #ifdef TENMOMENTS TenMoments& speciesMoments = fetch_momentsArray(thread_num); speciesMoments.set_to_zero(); @@ -570,6 +571,11 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop } #endif // TENMOMENTS } + if(!thread_num) timeTasks_end_task(TimeTasks::MOMENT_ACCUMULATION); + + // reduction + if(!thread_num) timeTasks_begin_task(TimeTasks::MOMENT_REDUCTION); + // split up the reduction tasks. // //{ @@ -646,6 +652,7 @@ void EMfields3D::sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTop for(int i=0;i Particles interpolation */ void EMfields3D::communicateGhostP2G(int ns, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, VirtualTopology3D * vct) { // interpolate adding common nodes among processors - timeTasks.start_communicate(); + timeTasks_set_communicating(); communicateInterp(nxn, nyn, nzn, ns, rhons.fetch_arr4(), 0, 0, 0, 0, 0, 0, vct); communicateInterp(nxn, nyn, nzn, ns, Jxs .fetch_arr4(), 0, 0, 0, 0, 0, 0, vct); @@ -1585,7 +1592,6 @@ void EMfields3D::communicateGhostP2G(int ns, int bcFaceXright, int bcFaceXleft, // calculate the correct densities on the boundaries adjustNonPeriodicDensities(ns, vct); // put the correct values on ghost cells - timeTasks.addto_communicate(); communicateNode_P(nxn, nyn, nzn, rhons, ns, vct); communicateNode_P(nxn, nyn, nzn, Jxs , ns, vct); diff --git a/iPic3D.cpp b/iPic3D.cpp index ec9b59ba..4e768a71 100644 --- a/iPic3D.cpp +++ b/iPic3D.cpp @@ -3,6 +3,7 @@ #include #include "iPic3D.h" #include "debug.h" +#include "TimeTasks.h" using namespace iPic3D; @@ -19,8 +20,14 @@ int main(int argc, char **argv) { if (KCode.get_myrank() == 0) cout << " ======= Cycle " << i << " ======= " << endl; if (!b_err) { + timeTasks.resetCycle(); + KCode.CalculateMoments(); KCode.CalculateField(); b_err = KCode.ParticlesMover(); + KCode.CalculateB(); + + // print out total time for all tasks + timeTasks.print_cycle_times(i); } if (b_err) { diff --git a/include/ComNodes3D.h b/include/ComNodes3D.h index c7e86731..edd6aba1 100644 --- a/include/ComNodes3D.h +++ b/include/ComNodes3D.h @@ -12,9 +12,6 @@ developers : Stefano Markidis, Giovanni Lapenta #include "arraysfwd.h" #include "ComBasic3D.h" -//#include "TimeTasks.h" - -//extern TimeTasks timeTasks; // boundary condition for fields #include "BcFields3D.h" diff --git a/include/TimeTasks.h b/include/TimeTasks.h index 8427bee5..84b9d230 100644 --- a/include/TimeTasks.h +++ b/include/TimeTasks.h @@ -1,45 +1,90 @@ #ifndef __TimeTasks_H__ #define __TimeTasks_H__ -class TimeTasks { +/* Avoid direct use of this class. + Instead, use and add to the macros at the bottom + so that we can redefine the macros when desired + (e.g. defining them to the empty string to + remove performance penalty). + */ + +class TimeTasks +{ + public: -public: // legitimate active subcycle values - enum Tasks { + // + // timeTasks_set_task(0) is a no-op, so + // MOMENT_REDUCTION=0 + // would prevent monitoring of this task. + // + enum Tasks // order must agree with taskNames in TimeTasks.cpp + { NONE = 0, MOMENTS, FIELDS, PARTICLES, + LAST, // no more exclusive tasks BFIELD, - LAST, - }; - enum Modes { - COMPUTATION = 0, - COMMUNICATION, + MOMENT_ACCUMULATION, + MOMENT_REDUCTION, + NUMBER_OF_TASKS // this line should be last }; -public: - void setActiveTask(int arg) { - active_task = arg; - } void setActiveMode(int in) { - t_start_communicate = in; - } - void resetCycle(); - void start(int taskid); - void end(int taskid); - void start_communicate(); - void addto_communicate(); - void print_cycle_times(); + private: + //enum Modes // for exclusive tasks + //{ + // COMPUTATION = 0, + // COMMUNICATION, + //}; + + public: // methods + TimeTasks() { resetCycle(); } + + // monitoring + // + void resetCycle(); + // + // provide start_time on ending call + // + void end_communicating(double start_time); + void start_main_task(TimeTasks::Tasks taskid); + void end_main_task(TimeTasks::Tasks taskid, double start_time); + void start_task(TimeTasks::Tasks taskid); + void end_task(TimeTasks::Tasks taskid, double start_time); + // + // provide start_time at starting call + // + void start_task(TimeTasks::Tasks taskid, double start_time); + void end_task(TimeTasks::Tasks taskid); + + // accessors + // + bool is_active(Tasks taskid){ return active[taskid]; } + bool get_communicating() { return communicating; } + void set_communicating(bool val) { communicating = val; } + int get_stack_depth(TimeTasks::Tasks taskid) { return stack_depth[taskid]; } + + // reporting + // + void print_cycle_times(int cycle); + + private: + + // is task exclusive? + bool is_exclusive(Tasks taskid) { return (taskid < LAST); } + + // reporting + // double get_time(int arg) { return task_duration[arg]; } double get_communicate(int arg) { return communicate[arg]; } - double get_communicate() { double total = 0.; for (int i = NONE + 1; i < LAST; i++) { @@ -47,7 +92,6 @@ class TimeTasks { } return total; } - double get_time() { double total = 0.; for (int i = NONE + 1; i < LAST; i++) { @@ -55,25 +99,101 @@ class TimeTasks { } return total; } - double get_compute(int arg) { return get_time(arg) - get_communicate(arg); } double get_compute() { return get_time() - get_communicate(); } + const char* get_taskname(int arg); -private: + private: int active_task; - int active_mode; - double t_start_communicate; - double start_times[LAST]; - double task_duration[LAST]; - double communicate[LAST]; - double compute[LAST]; - + bool active[NUMBER_OF_TASKS]; + bool communicating; + double task_duration[NUMBER_OF_TASKS]; + double communicate[NUMBER_OF_TASKS]; + double compute[NUMBER_OF_TASKS]; + int stack_depth[NUMBER_OF_TASKS]; + double start_times[NUMBER_OF_TASKS]; }; extern TimeTasks timeTasks; +// construct an anonymous instance of TimeTasksCaller +class TimeTasks_caller_to_set_main_task_for_scope +{ + double start_time; + TimeTasks::Tasks task; + public: + TimeTasks_caller_to_set_main_task_for_scope(TimeTasks::Tasks _task) : + task(_task) + { + start_time = MPI_Wtime(); + timeTasks.start_main_task(task); + } + ~TimeTasks_caller_to_set_main_task_for_scope() + { + timeTasks.end_main_task(task, start_time); + } +}; + +class TimeTasks_caller_to_set_task_for_scope +{ + bool already_active; + double start_time; + TimeTasks::Tasks task; + public: + TimeTasks_caller_to_set_task_for_scope(TimeTasks::Tasks _task) : + task(_task) + { + already_active = timeTasks.is_active(task); + if(!already_active) + { + start_time = MPI_Wtime(); + timeTasks.start_task(task); + } + } + ~TimeTasks_caller_to_set_task_for_scope() + { + if(already_active) + { + assert(timeTasks.is_active(task)); + } + else + { + timeTasks.end_task(task, start_time); + } + } +}; + +class TimeTasks_caller_to_set_communication_mode_for_scope +{ + private: + bool already_communicating; + double start_time; + public: + TimeTasks_caller_to_set_communication_mode_for_scope(); + ~TimeTasks_caller_to_set_communication_mode_for_scope(); +}; + +// These macros could be changed to provide file and line number +// +// We need to create nonanonymous instances so that the destructor +// will not be called until the end of the scope, so we use the preprocessor. +// to generate unique names of nonanonymous instances. +// +#define timeTasks_set_main_task(task) \ + TimeTasks_caller_to_set_main_task_for_scope myFunnyInstance(task); +#define timeTasks_set_task(task) \ + TimeTasks_caller_to_set_task_for_scope myFunnyName##__func__##__LINE__(task); +#define timeTasks_set_communicating() \ + TimeTasks_caller_to_set_communication_mode_for_scope myFunnyCommunicationInstance; +// +// The scoping trick does not work if the timeTasks call needs to be conditional, +// so we also provide the ability to explicitly begin and end. +#define timeTasks_begin_task(task) if(task) timeTasks.start_task(task, MPI_Wtime()); +#define timeTasks_end_task(task) if(task) timeTasks.end_task(task); +// + #endif diff --git a/include/iPic3D.h b/include/iPic3D.h index 9a79c131..93db7d11 100644 --- a/include/iPic3D.h +++ b/include/iPic3D.h @@ -34,8 +34,10 @@ namespace iPic3D { public: int Init(int argc, char **argv); - void CalculateField(); + void CalculateMoments(); + void CalculateField(); //! calculate Efield bool ParticlesMover(); + void CalculateB(); void WriteOutput(int cycle); void WriteConserved(int cycle); void WriteRestart(int cycle); diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 96216d78..d88692ed 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -171,11 +171,9 @@ int c_Solver::Init(int argc, char **argv) { return 0; } -void c_Solver::CalculateField() { +void c_Solver::CalculateMoments() { - timeTasks.resetCycle(); - // interpolation - timeTasks.start(TimeTasks::MOMENTS); + timeTasks_set_main_task(TimeTasks::MOMENTS); EMf->updateInfoFields(grid,vct,col); EMf->setZeroDensities(); // set to zero the densities @@ -201,32 +199,39 @@ void c_Solver::CalculateField() { EMf->interpDensitiesN2C(vct, grid); // calculate densities on centers from nodes EMf->calculateHatFunctions(grid, vct); // calculate the hat quantities for the implicit method MPI_Barrier(MPI_COMM_WORLD); - timeTasks.end(TimeTasks::MOMENTS); +} - // MAXWELL'S SOLVER - timeTasks.start(TimeTasks::FIELDS); +//! MAXWELL SOLVER for Efield +void c_Solver::CalculateField() { + timeTasks_set_main_task(TimeTasks::FIELDS); EMf->calculateE(grid, vct, col); // calculate the E field - timeTasks.end(TimeTasks::FIELDS); +} +//! MAXWELL SOLVER for Bfield (assuming Efield has already been calculated) +void c_Solver::CalculateB() { + timeTasks_set_main_task(TimeTasks::FIELDS); + timeTasks_set_task(TimeTasks::BFIELD); // subtask + EMf->calculateB(grid, vct, col); // calculate the B field } +/* -------------- */ +/*! Particle mover */ +/* -------------- */ bool c_Solver::ParticlesMover() { - /* -------------- */ - /* Particle mover */ - /* -------------- */ - - timeTasks.start(TimeTasks::PARTICLES); - // Should change this to add background guide field - EMf->set_fieldForPcls(); - for (int i = 0; i < ns; i++) // move each species + // move all species of particles { - // #pragma omp task inout(part[i]) in(grid) target_device(booster) - // - // should merely pass EMf->get_fieldForPcls() rather than EMf. - mem_avail = part[i].mover_PC(grid, vct, EMf); // use the Predictor Corrector scheme + timeTasks_set_main_task(TimeTasks::PARTICLES); + // Should change this to add background field + EMf->set_fieldForPcls(); + for (int i = 0; i < ns; i++) // move each species + { + // #pragma omp task inout(part[i]) in(grid) target_device(booster) + // + // should merely pass EMf->get_fieldForPcls() rather than EMf. + mem_avail = part[i].mover_PC(grid, vct, EMf); // use the Predictor Corrector scheme + } } - timeTasks.end(TimeTasks::PARTICLES); if (mem_avail < 0) { // not enough memory space allocated for particles: stop the simulation if (myrank == 0) { @@ -263,20 +268,7 @@ bool c_Solver::ParticlesMover() { for (int i=0; i < ns; i++) Qremoved[i] = part[i].deleteParticlesInsideSphere(col->getL_square(),col->getx_center(),col->gety_center(),col->getz_center()); } - - /* --------------------- */ - /* Calculate the B field */ - /* This step must be taken out of here! */ - /* --------------------- */ - - timeTasks.start(TimeTasks::BFIELD); - EMf->calculateB(grid, vct, col); // calculate the B field - timeTasks.end(TimeTasks::BFIELD); - - // print out total time for all tasks - timeTasks.print_cycle_times(); return (false); - } void c_Solver::WriteRestart(int cycle) { diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index ecb1dab7..50550ee6 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -647,7 +647,7 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { // ********************// // COMMUNICATION // *******************// - timeTasks.start_communicate(); + timeTasks_set_communicating(); // communicating until end of scope const int avail = communicate(vct); if (avail < 0) return (-1); @@ -660,7 +660,6 @@ int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { return (-1); MPI_Barrier(MPI_COMM_WORLD); } - timeTasks.addto_communicate(); return (0); // exit succcesfully (hopefully) } diff --git a/utility/TimeTasks.cpp b/utility/TimeTasks.cpp index b48bb070..15a2c578 100644 --- a/utility/TimeTasks.cpp +++ b/utility/TimeTasks.cpp @@ -4,121 +4,167 @@ #include "TimeTasks.h" #include "asserts.h" #include "MPIdata.h" // for get_rank +#include "debug.h" /** implementation of declarations in utility/TimeTasks.h **/ TimeTasks timeTasks; +static const char *taskNames[] = // order must agree with Tasks in TimeTasks.h +{ + "none", + "moments", + "fields", + "particles", + "last", + "bfield", + "moment_accumulation", + "moment_reduction", + "number_of_tasks" +}; + +const char* TimeTasks::get_taskname(int arg) +{ + assert_le(arg,NUMBER_OF_TASKS); + return taskNames[arg]; +} + void TimeTasks::resetCycle() { - for(int e=0;e Date: Thu, 10 Oct 2013 12:36:01 +0200 Subject: [PATCH 057/118] corrected compile error from previous commit --- include/TimeTasks.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/TimeTasks.h b/include/TimeTasks.h index 84b9d230..8387d042 100644 --- a/include/TimeTasks.h +++ b/include/TimeTasks.h @@ -1,5 +1,6 @@ #ifndef __TimeTasks_H__ #define __TimeTasks_H__ +#include "assert.h" /* Avoid direct use of this class. Instead, use and add to the macros at the bottom From e86bc65d6e444a28258ad1db32fb07812a8fb02d Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 14 Oct 2013 12:23:51 +0200 Subject: [PATCH 058/118] sumMoments now sums moments of all species --- fields/EMfields3D.cpp | 189 +++++++++++++++++++++++++++++++++++++++--- fields/Moments.cpp | 20 ----- include/EMfields3D.h | 16 +--- include/Moments.h | 72 +--------------- main/iPic3Dlib.cpp | 12 +-- 5 files changed, 185 insertions(+), 124 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 4abc4585..f081bb54 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -193,21 +193,15 @@ EMfields3D::EMfields3D(Collective * col, Grid * grid) : injFieldsRear = new injInfoFields(nxn, nyn, nzn); sizeMomentsArray = omp_thread_count(); - #ifdef TENMOMENTS - tenMomentsArray = new TenMoments*[sizeMomentsArray]; - #endif // TENMOMENTS moments10Array = new Moments10*[sizeMomentsArray]; for(int i=0;igetNXN(); + const int nyn = grid->getNYN(); + const int nzn = grid->getNZN(); + const double xstart = grid->getXstart(); + const double ystart = grid->getYstart(); + const double zstart = grid->getZstart(); + // To make memory use scale to a large number of threads, we + // could first apply an efficient parallel sorting algorithm + // to the particles and then accumulate moments in smaller + // subarrays. + //#ifdef _OPENMP + #pragma omp parallel + for (int i = 0; i < ns; i++) + { + const Particles3Dcomm& pcls = part[i]; + const int is = pcls.get_ns(); + + double const*const x = pcls.getXall(); + double const*const y = pcls.getYall(); + double const*const z = pcls.getZall(); + double const*const u = pcls.getUall(); + double const*const v = pcls.getVall(); + double const*const w = pcls.getWall(); + double const*const q = pcls.getQall(); + + const int nop = pcls.getNOP(); + + int thread_num = omp_get_thread_num(); + if(!thread_num) { timeTasks_begin_task(TimeTasks::MOMENT_ACCUMULATION); } + Moments10& speciesMoments10 = fetch_moments10Array(thread_num); + speciesMoments10.set_to_zero(); + arr4_double moments = speciesMoments10.fetch_arr(); + // The following loop is expensive, so it is wise to assume that the + // compiler is stupid. Therefore we should on the one hand + // expand things out and on the other hand avoid repeating computations. + #pragma omp for nowait + for (int i = 0; i < nop; i++) + { + // compute the quadratic moments of velocity + // + const double ui=u[i]; + const double vi=v[i]; + const double wi=w[i]; + const double uui=ui*ui; + const double uvi=ui*vi; + const double uwi=ui*wi; + const double vvi=vi*vi; + const double vwi=vi*wi; + const double wwi=wi*wi; + double velmoments[10]; + velmoments[0] = 1.; + velmoments[1] = ui; + velmoments[2] = vi; + velmoments[3] = wi; + velmoments[4] = uui; + velmoments[5] = uvi; + velmoments[6] = uwi; + velmoments[7] = vvi; + velmoments[8] = vwi; + velmoments[9] = wwi; + + // + // compute the weights to distribute the moments + // + const int ix = 2 + int (floor((x[i] - xstart) * inv_dx)); + const int iy = 2 + int (floor((y[i] - ystart) * inv_dy)); + const int iz = 2 + int (floor((z[i] - zstart) * inv_dz)); + const double xi0 = x[i] - grid->getXN(ix-1); + const double eta0 = y[i] - grid->getYN(iy-1); + const double zeta0 = z[i] - grid->getZN(iz-1); + const double xi1 = grid->getXN(ix) - x[i]; + const double eta1 = grid->getYN(iy) - y[i]; + const double zeta1 = grid->getZN(iz) - z[i]; + const double qi = q[i]; + const double weight000 = qi * xi0 * eta0 * zeta0 * invVOL; + const double weight001 = qi * xi0 * eta0 * zeta1 * invVOL; + const double weight010 = qi * xi0 * eta1 * zeta0 * invVOL; + const double weight011 = qi * xi0 * eta1 * zeta1 * invVOL; + const double weight100 = qi * xi1 * eta0 * zeta0 * invVOL; + const double weight101 = qi * xi1 * eta0 * zeta1 * invVOL; + const double weight110 = qi * xi1 * eta1 * zeta0 * invVOL; + const double weight111 = qi * xi1 * eta1 * zeta1 * invVOL; + double weights[8]; + weights[0] = weight000; + weights[1] = weight001; + weights[2] = weight010; + weights[3] = weight011; + weights[4] = weight100; + weights[5] = weight101; + weights[6] = weight110; + weights[0] = weight111; + + // add particle to moments + { + arr1_double_fetch moments000 = moments[ix ][iy ][iz ]; + arr1_double_fetch moments001 = moments[ix ][iy ][iz-1]; + arr1_double_fetch moments010 = moments[ix ][iy-1][iz ]; + arr1_double_fetch moments011 = moments[ix ][iy-1][iz-1]; + arr1_double_fetch moments100 = moments[ix-1][iy ][iz ]; + arr1_double_fetch moments101 = moments[ix-1][iy ][iz-1]; + arr1_double_fetch moments110 = moments[ix-1][iy-1][iz ]; + arr1_double_fetch moments111 = moments[ix-1][iy-1][iz-1]; + + arr1_double_fetch momentsArray[8]; + momentsArray[0] = moments000; + momentsArray[1] = moments001; + momentsArray[2] = moments010; + momentsArray[3] = moments011; + momentsArray[4] = moments100; + momentsArray[5] = moments101; + momentsArray[6] = moments110; + momentsArray[7] = moments111; + + double buffer[10][8]; + // #pragma simd + for(int m=0;m<10;m++) + for(int c=0;c<8;c++) + { + buffer[m][c] = velmoments[c]*weights[m]; + } + for(int c=0;c<8;c++) + for(int m=0;m<10;m++) + { + momentsArray[c][m] = buffer[m][c]; + } + } + } + if(!thread_num) timeTasks_end_task(TimeTasks::MOMENT_ACCUMULATION); + + // reduction + if(!thread_num) timeTasks_begin_task(TimeTasks::MOMENT_REDUCTION); + + // reduce arrays + { + #pragma omp critical (0) + for(int i=0;i Particles interpolation */ void communicateGhostP2G(int ns, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, VirtualTopology3D * vct); - void sumMoments(const Particles3Dcomm& pcls, Grid * grid, VirtualTopology3D * vct); + void sumMomentsOld(const Particles3Dcomm& pcls, Grid * grid, VirtualTopology3D * vct); + void sumMoments(const Particles3Dcomm* part, Grid * grid, VirtualTopology3D * vct); /*! add accumulated moments to the moments for a given species */ //void addToSpeciesMoments(const TenMoments & in, int is); /*! add an amount of charge density to charge density field at node X,Y,Z */ @@ -262,13 +260,6 @@ class EMfields3D // :public Field double getBenergy(); /*! fetch array for summing moments of thread i */ - #ifdef TENMOMENTS - TenMoments& fetch_momentsArray(int i){ - assert_le(0,i); - assert_le(i,sizeMomentsArray); - return *(tenMomentsArray[i]); - } - #endif // TENMOMENTS Moments10& fetch_moments10Array(int i){ assert_le(0,i); assert_le(i,sizeMomentsArray); @@ -402,9 +393,6 @@ class EMfields3D // :public Field array3_double divC; /* temporary arrays for summing moments */ int sizeMomentsArray; - #ifdef TENMOMENTS - TenMoments **tenMomentsArray; - #endif // TENMOMENTS Moments10 **moments10Array; // ******************************************************************************* diff --git a/include/Moments.h b/include/Moments.h index 8a4a10cf..9c6aadb8 100644 --- a/include/Moments.h +++ b/include/Moments.h @@ -2,6 +2,8 @@ #define Moments_H #include "Alloc.h" +// class to accumulate node-centered species moments +// class Moments10 { private: @@ -25,74 +27,4 @@ class Moments10 ~Moments10(){}; }; -// class to accumulate node-centered species moments -// -#ifdef TENMOMENTS -class TenMoments { - private: - arr3_double rho; - - /** current density, defined on nodes */ - arr3_double Jx; - arr3_double Jy; - arr3_double Jz; - - /** pressure tensor components, defined on nodes */ - arr3_double pXX; - arr3_double pXY; - arr3_double pXZ; - arr3_double pYY; - arr3_double pYZ; - arr3_double pZZ; - int nx; - int ny; - int nz; - public: - // get accessors (read access) - int get_nx() const { return nx; } - int get_ny() const { return ny; } - int get_nz() const { return nz; } - double get_rho(int i, int j, int k) const { return rho.get(i,j,k); } - double get_Jx (int i, int j, int k) const { return Jx .get(i,j,k); } - double get_Jy (int i, int j, int k) const { return Jy .get(i,j,k); } - double get_Jz (int i, int j, int k) const { return Jz .get(i,j,k); } - double get_pXX(int i, int j, int k) const { return pXX.get(i,j,k); } - double get_pXY(int i, int j, int k) const { return pXY.get(i,j,k); } - double get_pXZ(int i, int j, int k) const { return pXZ.get(i,j,k); } - double get_pYY(int i, int j, int k) const { return pYY.get(i,j,k); } - double get_pYZ(int i, int j, int k) const { return pYZ.get(i,j,k); } - double get_pZZ(int i, int j, int k) const { return pZZ.get(i,j,k); } - // fetch accessors (write access) - arr3_double fetch_rho() { return rho; } - arr3_double fetch_Jx () { return Jx ; } - arr3_double fetch_Jy () { return Jy ; } - arr3_double fetch_Jz () { return Jz ; } - arr3_double fetch_Pxx() { return pXX; } - arr3_double fetch_Pxy() { return pXY; } - arr3_double fetch_Pxz() { return pXZ; } - arr3_double fetch_Pyy() { return pYY; } - arr3_double fetch_Pyz() { return pYZ; } - arr3_double fetch_Pzz() { return pZZ; } - public: - TenMoments(int nxn, int nyn, int nzn) : - nx(nxn), - ny(nyn), - nz(nzn), - rho (nxn, nyn, nzn), - Jx (nxn, nyn, nzn), - Jy (nxn, nyn, nzn), - Jz (nxn, nyn, nzn), - pXX (nxn, nyn, nzn), - pXY (nxn, nyn, nzn), - pXZ (nxn, nyn, nzn), - pYY (nxn, nyn, nzn), - pYZ (nxn, nyn, nzn), - pZZ (nxn, nyn, nzn) - { - }; - ~TenMoments(){}; - void set_to_zero(); -}; -#endif // TENMOMENTS - #endif diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index d88692ed..0ee817c9 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -177,15 +177,9 @@ void c_Solver::CalculateMoments() { EMf->updateInfoFields(grid,vct,col); EMf->setZeroDensities(); // set to zero the densities - - for (int i = 0; i < ns; i++) - { - // interpolate particles to grid nodes - EMf->sumMoments(part[i], grid, vct); - //part[i].interpP2G(EMf, grid, vct); // the old, slow way. - } - - EMf->sumOverSpecies(vct); // sum all over the species + EMf->sumMoments(part, grid, vct); + //EMf->sumMomentsOld(part, grid, vct); + //EMf->sumOverSpecies(vct); // sum all over the species // Fill with constant charge the planet if (col->getCase()=="Dipole") { From aa96d727d90ddc3513002f6b1bafa4c897162889 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 14 Oct 2013 15:12:50 +0200 Subject: [PATCH 059/118] fix to errors in previous commit --- fields/EMfields3D.cpp | 258 +++++------------------------------------- main/iPic3Dlib.cpp | 10 +- 2 files changed, 38 insertions(+), 230 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index f081bb54..a652cda0 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -222,19 +222,6 @@ void EMfields3D::sumMomentsOld(const Particles3Dcomm& pcls, Grid * grid, Virtual // const int is = pcls.get_ns(); - #ifdef TENMOMENTS - double* rhons1d = &rhons[is][0][0][0]; - double* Jxs1d = &Jxs [is][0][0][0]; - double* Jys1d = &Jys [is][0][0][0]; - double* Jzs1d = &Jzs [is][0][0][0]; - double* pXXsn1d = &pXXsn[is][0][0][0]; - double* pXYsn1d = &pXYsn[is][0][0][0]; - double* pXZsn1d = &pXZsn[is][0][0][0]; - double* pYYsn1d = &pYYsn[is][0][0][0]; - double* pYZsn1d = &pYZsn[is][0][0][0]; - double* pZZsn1d = &pZZsn[is][0][0][0]; - #endif - // const int nop = pcls.getNOP(); // To make memory use scale to a large number of threads, we // could first apply an efficient parallel sorting algorithm @@ -245,20 +232,6 @@ void EMfields3D::sumMomentsOld(const Particles3Dcomm& pcls, Grid * grid, Virtual { int thread_num = omp_get_thread_num(); if(!thread_num) { timeTasks_begin_task(TimeTasks::MOMENT_ACCUMULATION); } - #ifdef TENMOMENTS - TenMoments& speciesMoments = fetch_momentsArray(thread_num); - speciesMoments.set_to_zero(); - arr3_double rho = speciesMoments.fetch_rho(); - arr3_double Jx = speciesMoments.fetch_Jx(); - arr3_double Jy = speciesMoments.fetch_Jy(); - arr3_double Jz = speciesMoments.fetch_Jz(); - arr3_double Pxx = speciesMoments.fetch_Pxx(); - arr3_double Pxy = speciesMoments.fetch_Pxy(); - arr3_double Pxz = speciesMoments.fetch_Pxz(); - arr3_double Pyy = speciesMoments.fetch_Pyy(); - arr3_double Pyz = speciesMoments.fetch_Pyz(); - arr3_double Pzz = speciesMoments.fetch_Pzz(); - #endif // TENMOMENTS Moments10& speciesMoments10 = fetch_moments10Array(thread_num); speciesMoments10.set_to_zero(); arr4_double moments = speciesMoments10.fetch_arr(); @@ -297,12 +270,6 @@ void EMfields3D::sumMomentsOld(const Particles3Dcomm& pcls, Grid * grid, Virtual const int ix = 2 + int (floor((x[i] - xstart) * inv_dx)); const int iy = 2 + int (floor((y[i] - ystart) * inv_dy)); const int iz = 2 + int (floor((z[i] - zstart) * inv_dz)); - //const double xi0 = x[i] - grid->getXN(ix - 1, iy, iz); - //const double eta0 = y[i] - grid->getYN(ix, iy - 1, iz); - //const double zeta0 = z[i] - grid->getZN(ix, iy, iz - 1); - //const double xi1 = grid->getXN(ix, iy, iz) - x[i]; - //const double eta1 = grid->getYN(ix, iy, iz) - y[i]; - //const double zeta1 = grid->getZN(ix, iy, iz) - z[i]; const double xi0 = x[i] - grid->getXN(ix-1); const double eta0 = y[i] - grid->getYN(iy-1); const double zeta0 = z[i] - grid->getZN(iz-1); @@ -437,212 +404,42 @@ void EMfields3D::sumMomentsOld(const Particles3Dcomm& pcls, Grid * grid, Virtual //} } - #ifdef TENMOMENTS - { - // use the weight to distribute the moments - // - // add charge density - //speciesMoments.addRho(weight, ix, iy, iz); - rho[ix ][iy ][iz ] += weight000; - rho[ix ][iy ][iz-1] += weight001; - rho[ix ][iy-1][iz ] += weight010; - rho[ix ][iy-1][iz-1] += weight011; - rho[ix-1][iy ][iz ] += weight100; - rho[ix-1][iy ][iz-1] += weight101; - rho[ix-1][iy-1][iz ] += weight110; - rho[ix-1][iy-1][iz-1] += weight111; - // add current density - X - //speciesMoments.addJx(temp, ix, iy, iz); - Jx[ix ][iy ][iz ] += ui*weight000; - Jx[ix ][iy ][iz-1] += ui*weight001; - Jx[ix ][iy-1][iz ] += ui*weight010; - Jx[ix ][iy-1][iz-1] += ui*weight011; - Jx[ix-1][iy ][iz ] += ui*weight100; - Jx[ix-1][iy ][iz-1] += ui*weight101; - Jx[ix-1][iy-1][iz ] += ui*weight110; - Jx[ix-1][iy-1][iz-1] += ui*weight111; - // add current density - Y - //speciesMoments.addJy(temp, ix, iy, iz); - Jy[ix ][iy ][iz ] += vi*weight000; - Jy[ix ][iy ][iz-1] += vi*weight001; - Jy[ix ][iy-1][iz ] += vi*weight010; - Jy[ix ][iy-1][iz-1] += vi*weight011; - Jy[ix-1][iy ][iz ] += vi*weight100; - Jy[ix-1][iy ][iz-1] += vi*weight101; - Jy[ix-1][iy-1][iz ] += vi*weight110; - Jy[ix-1][iy-1][iz-1] += vi*weight111; - // add current density - Z - //speciesMoments.addJz(temp, ix, iy, iz); - Jz[ix ][iy ][iz ] += wi*weight000; - Jz[ix ][iy ][iz-1] += wi*weight001; - Jz[ix ][iy-1][iz ] += wi*weight010; - Jz[ix ][iy-1][iz-1] += wi*weight011; - Jz[ix-1][iy ][iz ] += wi*weight100; - Jz[ix-1][iy ][iz-1] += wi*weight101; - Jz[ix-1][iy-1][iz ] += wi*weight110; - Jz[ix-1][iy-1][iz-1] += wi*weight111; - // Pxx - add pressure tensor - //speciesMoments.addPxx(temp, ix, iy, iz); - Pxx[ix ][iy ][iz ] += uui*weight000; - Pxx[ix ][iy ][iz-1] += uui*weight001; - Pxx[ix ][iy-1][iz ] += uui*weight010; - Pxx[ix ][iy-1][iz-1] += uui*weight011; - Pxx[ix-1][iy ][iz ] += uui*weight100; - Pxx[ix-1][iy ][iz-1] += uui*weight101; - Pxx[ix-1][iy-1][iz ] += uui*weight110; - Pxx[ix-1][iy-1][iz-1] += uui*weight111; - // Pxy - add pressure tensor - //speciesMoments.addPxy(temp, ix, iy, iz); - Pxy[ix ][iy ][iz ] += uvi*weight000; - Pxy[ix ][iy ][iz-1] += uvi*weight001; - Pxy[ix ][iy-1][iz ] += uvi*weight010; - Pxy[ix ][iy-1][iz-1] += uvi*weight011; - Pxy[ix-1][iy ][iz ] += uvi*weight100; - Pxy[ix-1][iy ][iz-1] += uvi*weight101; - Pxy[ix-1][iy-1][iz ] += uvi*weight110; - Pxy[ix-1][iy-1][iz-1] += uvi*weight111; - // Pxz - add pressure tensor - //speciesMoments.addPxz(temp, ix, iy, iz); - Pxz[ix ][iy ][iz ] += uwi*weight000; - Pxz[ix ][iy ][iz-1] += uwi*weight001; - Pxz[ix ][iy-1][iz ] += uwi*weight010; - Pxz[ix ][iy-1][iz-1] += uwi*weight011; - Pxz[ix-1][iy ][iz ] += uwi*weight100; - Pxz[ix-1][iy ][iz-1] += uwi*weight101; - Pxz[ix-1][iy-1][iz ] += uwi*weight110; - Pxz[ix-1][iy-1][iz-1] += uwi*weight111; - // Pyy - add pressure tensor - //speciesMoments.addPyy(temp, ix, iy, iz); - Pyy[ix ][iy ][iz ] += vvi*weight000; - Pyy[ix ][iy ][iz-1] += vvi*weight001; - Pyy[ix ][iy-1][iz ] += vvi*weight010; - Pyy[ix ][iy-1][iz-1] += vvi*weight011; - Pyy[ix-1][iy ][iz ] += vvi*weight100; - Pyy[ix-1][iy ][iz-1] += vvi*weight101; - Pyy[ix-1][iy-1][iz ] += vvi*weight110; - Pyy[ix-1][iy-1][iz-1] += vvi*weight111; - // Pyz - add pressure tensor - //speciesMoments.addPyz(temp, ix, iy, iz); - Pyz[ix ][iy ][iz ] += vwi*weight000; - Pyz[ix ][iy ][iz-1] += vwi*weight001; - Pyz[ix ][iy-1][iz ] += vwi*weight010; - Pyz[ix ][iy-1][iz-1] += vwi*weight011; - Pyz[ix-1][iy ][iz ] += vwi*weight100; - Pyz[ix-1][iy ][iz-1] += vwi*weight101; - Pyz[ix-1][iy-1][iz ] += vwi*weight110; - Pyz[ix-1][iy-1][iz-1] += vwi*weight111; - // Pzz - add pressure tensor - //speciesMoments.addPzz(temp, ix, iy, iz); - Pzz[ix ][iy ][iz ] += wwi*weight000; - Pzz[ix ][iy ][iz-1] += wwi*weight001; - Pzz[ix ][iy-1][iz ] += wwi*weight010; - Pzz[ix ][iy-1][iz-1] += wwi*weight011; - Pzz[ix-1][iy ][iz ] += wwi*weight100; - Pzz[ix-1][iy ][iz-1] += wwi*weight101; - Pzz[ix-1][iy-1][iz ] += wwi*weight110; - Pzz[ix-1][iy-1][iz-1] += wwi*weight111; - } - #endif // TENMOMENTS - - #ifdef TENMOMENTS - { - // check work - for(int jx=0;jx<2;jx++) - for(int jy=0;jy<2;jy++) - for(int jz=0;jz<2;jz++) - { - assert_eq(rho[ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][0]); - assert_eq(Jx [ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][1]); - assert_eq(Jy [ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][2]); - assert_eq(Jz [ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][3]); - assert_eq(Pxx[ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][4]); - assert_eq(Pxy[ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][5]); - assert_eq(Pxz[ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][6]); - assert_eq(Pyy[ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][7]); - assert_eq(Pyz[ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][8]); - assert_eq(Pzz[ix-jx][iy-jy][iz-jz], moments[ix-jx][iy-jy][iz-jz][9]); - } - } - #endif // TENMOMENTS } if(!thread_num) timeTasks_end_task(TimeTasks::MOMENT_ACCUMULATION); // reduction if(!thread_num) timeTasks_begin_task(TimeTasks::MOMENT_REDUCTION); - // split up the reduction tasks. - // - //{ - // // - // // One-dimensional array access is presumably - // // more efficient on poor compilers. - // double* rho1d = &rho[0][0][0]; - // double* Jx1d = &Jx [0][0][0]; - // double* Jy1d = &Jy [0][0][0]; - // double* Jz1d = &Jz [0][0][0]; - // double* Pxx1d = &Pxx[0][0][0]; - // double* Pxy1d = &Pxy[0][0][0]; - // double* Pxz1d = &Pxz[0][0][0]; - // double* Pyy1d = &Pyy[0][0][0]; - // double* Pyz1d = &Pyz[0][0][0]; - // double* Pzz1d = &Pzz[0][0][0]; - // //// - // assert_eq(speciesMoments.get_nx(), nxn); - // assert_eq(speciesMoments.get_ny(), nyn); - // assert_eq(speciesMoments.get_nz(), nzn); - // const int numel = nxn*nyn*nzn; - // #pragma omp critical - // for(int i=0;iupdateInfoFields(grid,vct,col); EMf->setZeroDensities(); // set to zero the densities - EMf->sumMoments(part, grid, vct); - //EMf->sumMomentsOld(part, grid, vct); - //EMf->sumOverSpecies(vct); // sum all over the species + + //EMf->sumMoments(part, grid, vct); + for (int i = 0; i < ns; i++) + { + EMf->sumMomentsOld(part[i], grid, vct); + } + EMf->sumOverSpecies(vct); // sum all over the species // Fill with constant charge the planet if (col->getCase()=="Dipole") { From c950a84e53b1e90ad92ead0f2c203aeee00546a1 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 14 Oct 2013 15:22:32 +0200 Subject: [PATCH 060/118] issue #55: Sum moments for all species in one OpenMP parallel clause (initial commit was e86bc65d6e4) --- fields/EMfields3D.cpp | 185 +++++++++--------------------------------- include/arraysfwd.h | 2 + main/iPic3Dlib.cpp | 10 +-- 3 files changed, 47 insertions(+), 150 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index a652cda0..b130ef2d 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -285,125 +285,34 @@ void EMfields3D::sumMomentsOld(const Particles3Dcomm& pcls, Grid * grid, Virtual const double weight101 = qi * xi1 * eta0 * zeta1 * invVOL; const double weight110 = qi * xi1 * eta1 * zeta0 * invVOL; const double weight111 = qi * xi1 * eta1 * zeta1 * invVOL; + double weights[8]; + weights[0] = weight000; + weights[1] = weight001; + weights[2] = weight010; + weights[3] = weight011; + weights[4] = weight100; + weights[5] = weight101; + weights[6] = weight110; + weights[7] = weight111; // add particle to moments { - arr1_double_fetch moments000 = moments[ix ][iy ][iz ]; - arr1_double_fetch moments001 = moments[ix ][iy ][iz-1]; - arr1_double_fetch moments010 = moments[ix ][iy-1][iz ]; - arr1_double_fetch moments011 = moments[ix ][iy-1][iz-1]; - arr1_double_fetch moments100 = moments[ix-1][iy ][iz ]; - arr1_double_fetch moments101 = moments[ix-1][iy ][iz-1]; - arr1_double_fetch moments110 = moments[ix-1][iy-1][iz ]; - arr1_double_fetch moments111 = moments[ix-1][iy-1][iz-1]; - - moments000[0] += velmoments[0]*weight000; - moments000[1] += velmoments[1]*weight000; - moments000[2] += velmoments[2]*weight000; - moments000[3] += velmoments[3]*weight000; - moments000[4] += velmoments[4]*weight000; - moments000[5] += velmoments[5]*weight000; - moments000[6] += velmoments[6]*weight000; - moments000[7] += velmoments[7]*weight000; - moments000[8] += velmoments[8]*weight000; - moments000[9] += velmoments[9]*weight000; - - moments001[0] += velmoments[0]*weight001; - moments001[1] += velmoments[1]*weight001; - moments001[2] += velmoments[2]*weight001; - moments001[3] += velmoments[3]*weight001; - moments001[4] += velmoments[4]*weight001; - moments001[5] += velmoments[5]*weight001; - moments001[6] += velmoments[6]*weight001; - moments001[7] += velmoments[7]*weight001; - moments001[8] += velmoments[8]*weight001; - moments001[9] += velmoments[9]*weight001; - - moments010[0] += velmoments[0]*weight010; - moments010[1] += velmoments[1]*weight010; - moments010[2] += velmoments[2]*weight010; - moments010[3] += velmoments[3]*weight010; - moments010[4] += velmoments[4]*weight010; - moments010[5] += velmoments[5]*weight010; - moments010[6] += velmoments[6]*weight010; - moments010[7] += velmoments[7]*weight010; - moments010[8] += velmoments[8]*weight010; - moments010[9] += velmoments[9]*weight010; - - moments011[0] += velmoments[0]*weight011; - moments011[1] += velmoments[1]*weight011; - moments011[2] += velmoments[2]*weight011; - moments011[3] += velmoments[3]*weight011; - moments011[4] += velmoments[4]*weight011; - moments011[5] += velmoments[5]*weight011; - moments011[6] += velmoments[6]*weight011; - moments011[7] += velmoments[7]*weight011; - moments011[8] += velmoments[8]*weight011; - moments011[9] += velmoments[9]*weight011; - - moments100[0] += velmoments[0]*weight100; - moments100[1] += velmoments[1]*weight100; - moments100[2] += velmoments[2]*weight100; - moments100[3] += velmoments[3]*weight100; - moments100[4] += velmoments[4]*weight100; - moments100[5] += velmoments[5]*weight100; - moments100[6] += velmoments[6]*weight100; - moments100[7] += velmoments[7]*weight100; - moments100[8] += velmoments[8]*weight100; - moments100[9] += velmoments[9]*weight100; - - moments101[0] += velmoments[0]*weight101; - moments101[1] += velmoments[1]*weight101; - moments101[2] += velmoments[2]*weight101; - moments101[3] += velmoments[3]*weight101; - moments101[4] += velmoments[4]*weight101; - moments101[5] += velmoments[5]*weight101; - moments101[6] += velmoments[6]*weight101; - moments101[7] += velmoments[7]*weight101; - moments101[8] += velmoments[8]*weight101; - moments101[9] += velmoments[9]*weight101; - - moments110[0] += velmoments[0]*weight110; - moments110[1] += velmoments[1]*weight110; - moments110[2] += velmoments[2]*weight110; - moments110[3] += velmoments[3]*weight110; - moments110[4] += velmoments[4]*weight110; - moments110[5] += velmoments[5]*weight110; - moments110[6] += velmoments[6]*weight110; - moments110[7] += velmoments[7]*weight110; - moments110[8] += velmoments[8]*weight110; - moments110[9] += velmoments[9]*weight110; - - moments111[0] += velmoments[0]*weight111; - moments111[1] += velmoments[1]*weight111; - moments111[2] += velmoments[2]*weight111; - moments111[3] += velmoments[3]*weight111; - moments111[4] += velmoments[4]*weight111; - moments111[5] += velmoments[5]*weight111; - moments111[6] += velmoments[6]*weight111; - moments111[7] += velmoments[7]*weight111; - moments111[8] += velmoments[8]*weight111; - moments111[9] += velmoments[9]*weight111; - - //double weight[2][2][2]; - //weight[0][0][0]=weight000; - //weight[0][0][1]=weight001; - //weight[0][1][0]=weight010; - //weight[0][1][1]=weight011; - //weight[1][0][0]=weight100; - //weight[1][0][1]=weight101; - //weight[1][1][0]=weight110; - //weight[1][1][1]=weight111; - //// - //for(int jx=0;jx<2;jx++) - //for(int jy=0;jy<2;jy++) - //for(int jz=0;jz<2;jz++) - //for(int m=0;m<10;m++) - //{ - // moments[ix-jx][iy-jy][iz-jz][m] += velmoments[m]*weight[jx][jy][jz]; - //} + arr1_double_fetch momentsArray[8]; + momentsArray[0] = moments[ix ][iy ][iz ]; // moments000 + momentsArray[1] = moments[ix ][iy ][iz-1]; // moments001 + momentsArray[2] = moments[ix ][iy-1][iz ]; // moments010 + momentsArray[3] = moments[ix ][iy-1][iz-1]; // moments011 + momentsArray[4] = moments[ix-1][iy ][iz ]; // moments100 + momentsArray[5] = moments[ix-1][iy ][iz-1]; // moments101 + momentsArray[6] = moments[ix-1][iy-1][iz ]; // moments110 + momentsArray[7] = moments[ix-1][iy-1][iz-1]; // moments111 + + for(int m=0; m<10; m++) + for(int c=0; c<8; c++) + { + momentsArray[c][m] += velmoments[m]*weights[c]; + } } - } if(!thread_num) timeTasks_end_task(TimeTasks::MOMENT_ACCUMULATION); @@ -548,38 +457,24 @@ void EMfields3D::sumMoments(const Particles3Dcomm* part, Grid * grid, VirtualTop // add particle to moments { - arr1_double_fetch moments000 = moments[ix ][iy ][iz ]; - arr1_double_fetch moments001 = moments[ix ][iy ][iz-1]; - arr1_double_fetch moments010 = moments[ix ][iy-1][iz ]; - arr1_double_fetch moments011 = moments[ix ][iy-1][iz-1]; - arr1_double_fetch moments100 = moments[ix-1][iy ][iz ]; - arr1_double_fetch moments101 = moments[ix-1][iy ][iz-1]; - arr1_double_fetch moments110 = moments[ix-1][iy-1][iz ]; - arr1_double_fetch moments111 = moments[ix-1][iy-1][iz-1]; - arr1_double_fetch momentsArray[8]; - momentsArray[0] = moments000; - momentsArray[1] = moments001; - momentsArray[2] = moments010; - momentsArray[3] = moments011; - momentsArray[4] = moments100; - momentsArray[5] = moments101; - momentsArray[6] = moments110; - momentsArray[7] = moments111; - - double buffer[10][8]; - //#pragma simd - for(int m=0;m<10;m++) - { - for(int c=0;c<8;c++) - { - buffer[m][c] = velmoments[m]*weights[c]; - } - } - for(int c=0;c<8;c++) - for(int m=0;m<10;m++) + arr2_double_fetch moments00 = moments[ix ][iy ]; + arr2_double_fetch moments01 = moments[ix ][iy-1]; + arr2_double_fetch moments10 = moments[ix-1][iy ]; + arr2_double_fetch moments11 = moments[ix-1][iy-1]; + momentsArray[0] = moments00[iz ]; // moments000 + momentsArray[1] = moments00[iz-1]; // moments001 + momentsArray[2] = moments01[iz ]; // moments010 + momentsArray[3] = moments01[iz-1]; // moments011 + momentsArray[4] = moments10[iz ]; // moments100 + momentsArray[5] = moments10[iz-1]; // moments101 + momentsArray[6] = moments11[iz ]; // moments110 + momentsArray[7] = moments11[iz-1]; // moments111 + + for(int m=0; m<10; m++) + for(int c=0; c<8; c++) { - momentsArray[c][m] = buffer[m][c]; + momentsArray[c][m] += velmoments[m]*weights[c]; } } } diff --git a/include/arraysfwd.h b/include/arraysfwd.h index 30bda425..889c950d 100644 --- a/include/arraysfwd.h +++ b/include/arraysfwd.h @@ -57,10 +57,12 @@ typedef iPic3D::array4 array4_pfloat; typedef iPic3D::array_fetch1 arr1_double_fetch; typedef iPic3D::array_get1 arr1_double_get; typedef iPic3D::array_get1 arr1_pfloat_get; +typedef iPic3D::array_fetch2 arr2_double_fetch; #else typedef double* arr1_double_fetch; typedef double* arr1_double_get; typedef pfloat* arr1_pfloat_get; +typedef double** arr2_double_fetch; #endif #endif diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 449c56c2..5faaf366 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -178,11 +178,11 @@ void c_Solver::CalculateMoments() { EMf->updateInfoFields(grid,vct,col); EMf->setZeroDensities(); // set to zero the densities - //EMf->sumMoments(part, grid, vct); - for (int i = 0; i < ns; i++) - { - EMf->sumMomentsOld(part[i], grid, vct); - } + EMf->sumMoments(part, grid, vct); + //for (int i = 0; i < ns; i++) + //{ + // EMf->sumMomentsOld(part[i], grid, vct); + //} EMf->sumOverSpecies(vct); // sum all over the species // Fill with constant charge the planet From 3837df216981f32893e506811290ac121c09e9a6 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 14 Oct 2013 16:51:28 +0200 Subject: [PATCH 061/118] create OpenMP threads only once to push particles --- include/Particles3D.h | 4 +++- main/iPic3Dlib.cpp | 7 ++++++- particles/Particles3D.cpp | 37 ++++++++----------------------------- 3 files changed, 17 insertions(+), 31 deletions(-) diff --git a/include/Particles3D.h b/include/Particles3D.h index fd89edd0..74cfbf37 100644 --- a/include/Particles3D.h +++ b/include/Particles3D.h @@ -57,7 +57,9 @@ class Particles3D:public Particles3Dcomm { /** mover with the esplicit non relativistic scheme */ void mover_explicit(Grid * grid, VirtualTopology3D * vct, Field * EMf); /** mover with a Predictor-Corrector Scheme */ - int mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf); + void mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf); + /** communicate particle after moving them */ + int communicate_particles(VirtualTopology3D * vct); /** relativistic mover with a Predictor-Corrector scheme */ int mover_relativistic(Grid * grid, VirtualTopology3D * vct, Field * EMf); /** particle repopulator */ diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 5faaf366..77bd3120 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -222,12 +222,17 @@ bool c_Solver::ParticlesMover() { timeTasks_set_main_task(TimeTasks::PARTICLES); // Should change this to add background field EMf->set_fieldForPcls(); + #pragma omp parallel for (int i = 0; i < ns; i++) // move each species { // #pragma omp task inout(part[i]) in(grid) target_device(booster) // // should merely pass EMf->get_fieldForPcls() rather than EMf. - mem_avail = part[i].mover_PC(grid, vct, EMf); // use the Predictor Corrector scheme + part[i].mover_PC(grid, vct, EMf); // use the Predictor Corrector scheme + } + for (int i = 0; i < ns; i++) // move each species + { + mem_avail = part[i].communicate_particles(vct); } } diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 50550ee6..7edbf634 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -311,41 +311,19 @@ void Particles3D::mover_explicit(Grid * grid, VirtualTopology3D * vct, Field * E } /** mover with a Predictor-Corrector scheme */ -int Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { +void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { + #pragma omp master if (vct->getCartesian_rank() == 0) { cout << "*** MOVER species " << ns << " ***" << NiterMover << " ITERATIONS ****" << endl; } - double start_mover_PC = MPI_Wtime(); - #if 0 - const_arr3_double Ex = EMf->getEx(); - const_arr3_double Ey = EMf->getEy(); - const_arr3_double Ez = EMf->getEz(); - const_arr3_double Bx = EMf->getBx(); - const_arr3_double By = EMf->getBy(); - const_arr3_double Bz = EMf->getBz(); - #endif const_arr4_pfloat fieldForPcls = EMf->get_fieldForPcls(); - #if 0 - for(int i=0;i Date: Tue, 15 Oct 2013 07:06:14 +0200 Subject: [PATCH 062/118] issue #56: mover_PC(): iterate NiterMover times --- particles/Particles3D.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 7edbf634..bef9b682 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -341,7 +341,7 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { pfloat vptilde; pfloat wptilde; // calculate the average velocity iteratively - for (int innter = 0; innter < 1; innter++) { + for (int innter = 0; innter < NiterMover; innter++) { // interpolation G-->P const pfloat ixd = floor((xp - xstart) * inv_dx); const pfloat iyd = floor((yp - ystart) * inv_dy); From d51cb740ad30adc41b5b8467c26cc54d602a43b8 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 15 Oct 2013 07:25:08 +0200 Subject: [PATCH 063/118] issue #54: TimeTasks now averaged across threads --- fields/EMfields3D.cpp | 18 ++++++---- include/TimeTasks.h | 24 ++++--------- include/ompdefs.h | 15 ++------ utility/TimeTasks.cpp | 81 ++++++++++++++++++++++++++++++++----------- 4 files changed, 81 insertions(+), 57 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index b130ef2d..37104107 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -192,7 +192,7 @@ EMfields3D::EMfields3D(Collective * col, Grid * grid) : injFieldsFront = new injInfoFields(nxn, nyn, nzn); injFieldsRear = new injInfoFields(nxn, nyn, nzn); - sizeMomentsArray = omp_thread_count(); + sizeMomentsArray = omp_get_max_threads(); moments10Array = new Moments10*[sizeMomentsArray]; for(int i=0;i #else -inline int omp_get_thread_num() { - return 0; -} +inline int omp_get_thread_num() { return 0;} +inline int omp_get_max_threads(){ return 1;} #endif -inline int omp_thread_count() { - int n = 0; - #pragma omp parallel reduction(+:n) - n += 1; - #ifndef _OPENMP // USING_OMP - assert_eq(n,1); - #endif - return n; -} - #endif diff --git a/utility/TimeTasks.cpp b/utility/TimeTasks.cpp index 15a2c578..dd114ffc 100644 --- a/utility/TimeTasks.cpp +++ b/utility/TimeTasks.cpp @@ -34,7 +34,6 @@ void TimeTasks::resetCycle() for(int e=0;e Date: Tue, 15 Oct 2013 11:46:52 +0200 Subject: [PATCH 064/118] implemented CallFinalize input file option --- include/Collective.h | 3 +++ inputfiles/GEM.inp | 2 ++ main/iPic3Dlib.cpp | 5 ++++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/Collective.h b/include/Collective.h index 415baa98..382611b6 100644 --- a/include/Collective.h +++ b/include/Collective.h @@ -138,6 +138,7 @@ class Collective int getParticlesOutputCycle()const{ return (ParticlesOutputCycle); } int getRestartOutputCycle()const{ return (RestartOutputCycle); } int getDiagnosticsOutputCycle()const{ return (DiagnosticsOutputCycle); } + bool getCallFinalize()const{ return (CallFinalize); } /*! Boundary condition selection for BCFace for the electric field components */ int bcEx[6], bcEy[6], bcEz[6]; @@ -328,6 +329,8 @@ class Collective int RestartOutputCycle; /*! Output for diagnostics */ int DiagnosticsOutputCycle; + /*! Call Finalize() at end of program execution (true by default) */ + bool CallFinalize; }; typedef Collective CollectiveIO; diff --git a/inputfiles/GEM.inp b/inputfiles/GEM.inp index f3eb5aab..f8602707 100644 --- a/inputfiles/GEM.inp +++ b/inputfiles/GEM.inp @@ -149,3 +149,5 @@ w0 = 0.00325 -0.01624 RestartOutputCycle = 4000 # Diagnostics cycle DiagnosticsOutputCycle = 1 +# 1 (true) by default +#CallFinalize = 0 diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 77bd3120..f476f01f 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -360,8 +360,11 @@ void c_Solver::WriteOutput(int cycle) { } void c_Solver::Finalize() { - if (mem_avail == 0) // write the restart only if the simulation finished succesfully + if (mem_avail == 0 // write the restart only if the simulation finished successfully + && col->getCallFinalize()) + { writeRESTART(RestartDirName, myrank, (col->getNcycles() + first_cycle) - 1, ns, mpi, vct, col, grid, EMf, part, 0); + } // stop profiling my_clock->stopTiming(); From 489b6707b0bb6be786a70eab36a6a1043602672b Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 15 Oct 2013 12:14:13 +0200 Subject: [PATCH 065/118] cleaned up mover_PC() and sumMoments() methods --- fields/EMfields3D.cpp | 39 +++-- inputoutput/Collective.cpp | 1 + particles/Particles3D.cpp | 301 +++++++++---------------------------- 3 files changed, 91 insertions(+), 250 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 37104107..972361e2 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -443,23 +443,30 @@ void EMfields3D::sumMoments(const Particles3Dcomm* part, Grid * grid, VirtualTop const double eta1 = grid->getYN(iy) - y[i]; const double zeta1 = grid->getZN(iz) - z[i]; const double qi = q[i]; - const double weight000 = qi * xi0 * eta0 * zeta0 * invVOL; - const double weight001 = qi * xi0 * eta0 * zeta1 * invVOL; - const double weight010 = qi * xi0 * eta1 * zeta0 * invVOL; - const double weight011 = qi * xi0 * eta1 * zeta1 * invVOL; - const double weight100 = qi * xi1 * eta0 * zeta0 * invVOL; - const double weight101 = qi * xi1 * eta0 * zeta1 * invVOL; - const double weight110 = qi * xi1 * eta1 * zeta0 * invVOL; - const double weight111 = qi * xi1 * eta1 * zeta1 * invVOL; + const double invVOLqi = invVOL*qi; + const double weight0 = invVOLqi * xi0; + const double weight1 = invVOLqi * xi1; + const double weight00 = weight0*eta0; + const double weight01 = weight0*eta1; + const double weight10 = weight1*eta0; + const double weight11 = weight1*eta1; double weights[8]; - weights[0] = weight000; - weights[1] = weight001; - weights[2] = weight010; - weights[3] = weight011; - weights[4] = weight100; - weights[5] = weight101; - weights[6] = weight110; - weights[7] = weight111; + weights[0] = weight00*zeta0; // weight000 + weights[1] = weight00*zeta1; // weight001 + weights[2] = weight01*zeta0; // weight010 + weights[3] = weight01*zeta1; // weight011 + weights[4] = weight10*zeta0; // weight100 + weights[5] = weight10*zeta1; // weight101 + weights[6] = weight11*zeta0; // weight110 + weights[7] = weight11*zeta1; // weight111 + //weights[0] = xi0 * eta0 * zeta0 * qi * invVOL; // weight000 + //weights[1] = xi0 * eta0 * zeta1 * qi * invVOL; // weight001 + //weights[2] = xi0 * eta1 * zeta0 * qi * invVOL; // weight010 + //weights[3] = xi0 * eta1 * zeta1 * qi * invVOL; // weight011 + //weights[4] = xi1 * eta0 * zeta0 * qi * invVOL; // weight100 + //weights[5] = xi1 * eta0 * zeta1 * qi * invVOL; // weight101 + //weights[6] = xi1 * eta1 * zeta0 * qi * invVOL; // weight110 + //weights[7] = xi1 * eta1 * zeta1 * qi * invVOL; // weight111 // add particle to moments { diff --git a/inputoutput/Collective.cpp b/inputoutput/Collective.cpp index 708be195..5f0b7bbe 100644 --- a/inputoutput/Collective.cpp +++ b/inputoutput/Collective.cpp @@ -91,6 +91,7 @@ void Collective::ReadInput(string inputfile) { ParticlesOutputCycle = config.read < int >("ParticlesOutputCycle"); RestartOutputCycle = config.read < int >("RestartOutputCycle"); DiagnosticsOutputCycle = config.read < int >("DiagnosticsOutputCycle", FieldOutputCycle); + CallFinalize = config.read < bool >("CallFinalize", true); } if (RESTART1) { // you are restarting diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index bef9b682..55d98962 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -320,23 +320,23 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { const pfloat dto2 = .5 * dt, qomdt2 = qom * dto2 / c; const pfloat inv_dx = 1.0 / dx, inv_dy = 1.0 / dy, inv_dz = 1.0 / dz; - // don't bother trying to push any particles simultaneously; - // MIC already does vectorization automatically, and trying - // to do it by hand only hurts performance. #pragma omp for // why does single precision make no difference in execution speed? //#pragma simd vectorlength(VECTOR_WIDTH) - for (int rest = 0; rest < nop; rest++) { + for (int pidx = 0; pidx < nop; pidx++) { // copy the particle - const pfloat xptilde = x[rest]; - const pfloat yptilde = y[rest]; - const pfloat zptilde = z[rest]; + const pfloat xptilde = x[pidx]; + const pfloat yptilde = y[pidx]; + const pfloat zptilde = z[pidx]; + const pfloat up_orig = u[pidx]; + const pfloat vp_orig = v[pidx]; + const pfloat wp_orig = w[pidx]; pfloat xp = xptilde; pfloat yp = yptilde; pfloat zp = zptilde; - pfloat up = u[rest]; - pfloat vp = v[rest]; - pfloat wp = w[rest]; + pfloat up = up_orig; + pfloat vp = vp_orig; + pfloat wp = wp_orig; pfloat uptilde; pfloat vptilde; pfloat wptilde; @@ -362,15 +362,12 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { if (iz > nzn - 1) iz = nzn - 1; - pfloat xi[2]; - pfloat eta[2]; - pfloat zeta[2]; - xi[0] = xp - grid->get_pfloat_XN(ix-1); - eta[0] = yp - grid->get_pfloat_YN(iy-1); - zeta[0] = zp - grid->get_pfloat_ZN(iz-1); - xi[1] = grid->get_pfloat_XN(ix) - xp; - eta[1] = grid->get_pfloat_YN(iy) - yp; - zeta[1] = grid->get_pfloat_ZN(iz) - zp; + const pfloat xi0 = xp - grid->get_pfloat_XN(ix-1); + const pfloat eta0 = yp - grid->get_pfloat_YN(iy-1); + const pfloat zeta0 = zp - grid->get_pfloat_ZN(iz-1); + const pfloat xi1 = grid->get_pfloat_XN(ix) - xp; + const pfloat eta1 = grid->get_pfloat_YN(iy) - yp; + const pfloat zeta1 = grid->get_pfloat_ZN(iz) - zp; pfloat Exl = 0.0; pfloat Eyl = 0.0; @@ -379,216 +376,52 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { pfloat Byl = 0.0; pfloat Bzl = 0.0; - // MIC refuses to vectorize this ... - // - // pfloat weight[2][2][2]; - // for (int ii = 0; ii < 2; ii++) - // for (int jj = 0; jj < 2; jj++) - // for (int kk = 0; kk < 2; kk++) - // weight[ii][jj][kk] = xi[ii] * eta[jj] * zeta[kk] * invVOL; - // for (int ii = 0; ii < 2; ii++) - // for (int jj = 0; jj < 2; jj++) - // for (int kk = 0; kk < 2; kk++) { - // const pfloat Exlp = weight[ii][jj][kk] * Ex.get(ix - ii, iy - jj, iz - kk); - // const pfloat Eylp = weight[ii][jj][kk] * Ey.get(ix - ii, iy - jj, iz - kk); - // const pfloat Ezlp = weight[ii][jj][kk] * Ez.get(ix - ii, iy - jj, iz - kk); - // const pfloat Bxlp = weight[ii][jj][kk] * Bx.get(ix - ii, iy - jj, iz - kk); - // const pfloat Bylp = weight[ii][jj][kk] * By.get(ix - ii, iy - jj, iz - kk); - // const pfloat Bzlp = weight[ii][jj][kk] * Bz.get(ix - ii, iy - jj, iz - kk); - // Exl += Exlp; - // Eyl += Eylp; - // Ezl += Ezlp; - // Bxl += Bxlp; - // Byl += Bylp; - // Bzl += Bzlp; - // } - - // ... so we expand things out instead - // - const pfloat weight000 = xi[0] * eta[0] * zeta[0] * invVOL; - const pfloat weight001 = xi[0] * eta[0] * zeta[1] * invVOL; - const pfloat weight010 = xi[0] * eta[1] * zeta[0] * invVOL; - const pfloat weight011 = xi[0] * eta[1] * zeta[1] * invVOL; - const pfloat weight100 = xi[1] * eta[0] * zeta[0] * invVOL; - const pfloat weight101 = xi[1] * eta[0] * zeta[1] * invVOL; - const pfloat weight110 = xi[1] * eta[1] * zeta[0] * invVOL; - const pfloat weight111 = xi[1] * eta[1] * zeta[1] * invVOL; + pfloat weights[8]; + const pfloat weight0 = invVOL*xi0; + const pfloat weight1 = invVOL*xi1; + const pfloat weight00 = weight0*eta0; + const pfloat weight01 = weight0*eta1; + const pfloat weight10 = weight1*eta0; + const pfloat weight11 = weight1*eta1; + weights[0] = weight00*zeta0; // weight000 + weights[1] = weight00*zeta1; // weight001 + weights[2] = weight01*zeta0; // weight010 + weights[3] = weight01*zeta1; // weight011 + weights[4] = weight10*zeta0; // weight100 + weights[5] = weight10*zeta1; // weight101 + weights[6] = weight11*zeta0; // weight110 + weights[7] = weight11*zeta1; // weight111 + //weights[0] = xi0 * eta0 * zeta0 * qi * invVOL; // weight000 + //weights[1] = xi0 * eta0 * zeta1 * qi * invVOL; // weight001 + //weights[2] = xi0 * eta1 * zeta0 * qi * invVOL; // weight010 + //weights[3] = xi0 * eta1 * zeta1 * qi * invVOL; // weight011 + //weights[4] = xi1 * eta0 * zeta0 * qi * invVOL; // weight100 + //weights[5] = xi1 * eta0 * zeta1 * qi * invVOL; // weight101 + //weights[6] = xi1 * eta1 * zeta0 * qi * invVOL; // weight110 + //weights[7] = xi1 * eta1 * zeta1 * qi * invVOL; // weight111 // creating these aliases seems to accelerate this method by about 30% // on the Xeon host, processor, suggesting deficiency in the optimizer. // - arr1_pfloat_get field000 = fieldForPcls[ix ][iy ][iz ]; - arr1_pfloat_get field001 = fieldForPcls[ix ][iy ][iz-1]; - arr1_pfloat_get field010 = fieldForPcls[ix ][iy-1][iz ]; - arr1_pfloat_get field011 = fieldForPcls[ix ][iy-1][iz-1]; - arr1_pfloat_get field100 = fieldForPcls[ix-1][iy ][iz ]; - arr1_pfloat_get field101 = fieldForPcls[ix-1][iy ][iz-1]; - arr1_pfloat_get field110 = fieldForPcls[ix-1][iy-1][iz ]; - arr1_pfloat_get field111 = fieldForPcls[ix-1][iy-1][iz-1]; - // - #if 0 // (takes same time as other order) - Bxl += weight000 * field000[0]; - Bxl += weight001 * field001[0]; - Bxl += weight010 * field010[0]; - Bxl += weight011 * field011[0]; - Bxl += weight100 * field100[0]; - Bxl += weight101 * field101[0]; - Bxl += weight110 * field110[0]; - Bxl += weight111 * field111[0]; - Byl += weight000 * field000[1]; - Byl += weight001 * field001[1]; - Byl += weight010 * field010[1]; - Byl += weight011 * field011[1]; - Byl += weight100 * field100[1]; - Byl += weight101 * field101[1]; - Byl += weight110 * field110[1]; - Byl += weight111 * field111[1]; - Bzl += weight000 * field000[2]; - Bzl += weight001 * field001[2]; - Bzl += weight010 * field010[2]; - Bzl += weight011 * field011[2]; - Bzl += weight100 * field100[2]; - Bzl += weight101 * field101[2]; - Bzl += weight110 * field110[2]; - Bzl += weight111 * field111[2]; - Exl += weight000 * field000[3]; - Exl += weight001 * field001[3]; - Exl += weight010 * field010[3]; - Exl += weight011 * field011[3]; - Exl += weight100 * field100[3]; - Exl += weight101 * field101[3]; - Exl += weight110 * field110[3]; - Exl += weight111 * field111[3]; - Eyl += weight000 * field000[4]; - Eyl += weight001 * field001[4]; - Eyl += weight010 * field010[4]; - Eyl += weight011 * field011[4]; - Eyl += weight100 * field100[4]; - Eyl += weight101 * field101[4]; - Eyl += weight110 * field110[4]; - Eyl += weight111 * field111[4]; - Ezl += weight000 * field000[5]; - Ezl += weight001 * field001[5]; - Ezl += weight010 * field010[5]; - Ezl += weight011 * field011[5]; - Ezl += weight100 * field100[5]; - Ezl += weight101 * field101[5]; - Ezl += weight110 * field110[5]; - Ezl += weight111 * field111[5]; - #endif - - Bxl += weight000 * field000[0]; - Byl += weight000 * field000[1]; - Bzl += weight000 * field000[2]; - Exl += weight000 * field000[3]; - Eyl += weight000 * field000[4]; - Ezl += weight000 * field000[5]; - - Bxl += weight001 * field001[0]; - Byl += weight001 * field001[1]; - Bzl += weight001 * field001[2]; - Exl += weight001 * field001[3]; - Eyl += weight001 * field001[4]; - Ezl += weight001 * field001[5]; - - Bxl += weight010 * field010[0]; - Byl += weight010 * field010[1]; - Bzl += weight010 * field010[2]; - Exl += weight010 * field010[3]; - Eyl += weight010 * field010[4]; - Ezl += weight010 * field010[5]; - - Bxl += weight011 * field011[0]; - Byl += weight011 * field011[1]; - Bzl += weight011 * field011[2]; - Exl += weight011 * field011[3]; - Eyl += weight011 * field011[4]; - Ezl += weight011 * field011[5]; - - Bxl += weight100 * field100[0]; - Byl += weight100 * field100[1]; - Bzl += weight100 * field100[2]; - Exl += weight100 * field100[3]; - Eyl += weight100 * field100[4]; - Ezl += weight100 * field100[5]; - - Bxl += weight101 * field101[0]; - Byl += weight101 * field101[1]; - Bzl += weight101 * field101[2]; - Exl += weight101 * field101[3]; - Eyl += weight101 * field101[4]; - Ezl += weight101 * field101[5]; - - Bxl += weight110 * field110[0]; - Byl += weight110 * field110[1]; - Bzl += weight110 * field110[2]; - Exl += weight110 * field110[3]; - Eyl += weight110 * field110[4]; - Ezl += weight110 * field110[5]; - - Bxl += weight111 * field111[0]; - Byl += weight111 * field111[1]; - Bzl += weight111 * field111[2]; - Exl += weight111 * field111[3]; - Eyl += weight111 * field111[4]; - Ezl += weight111 * field111[5]; - - #if 0 - Bxl += weight000 * Bx[ix][iy][iz]; - Bxl += weight000 * Bx[ix][iy][iz]; - Bxl += weight001 * Bx[ix][iy][iz - 1]; - Bxl += weight010 * Bx[ix][iy - 1][iz]; - Bxl += weight011 * Bx[ix][iy - 1][iz - 1]; - Bxl += weight100 * Bx[ix - 1][iy][iz]; - Bxl += weight101 * Bx[ix - 1][iy][iz - 1]; - Bxl += weight110 * Bx[ix - 1][iy - 1][iz]; - Bxl += weight111 * Bx[ix - 1][iy - 1][iz - 1]; - // - Byl += weight000 * By[ix][iy][iz]; - Byl += weight001 * By[ix][iy][iz - 1]; - Byl += weight010 * By[ix][iy - 1][iz]; - Byl += weight011 * By[ix][iy - 1][iz - 1]; - Byl += weight100 * By[ix - 1][iy][iz]; - Byl += weight101 * By[ix - 1][iy][iz - 1]; - Byl += weight110 * By[ix - 1][iy - 1][iz]; - Byl += weight111 * By[ix - 1][iy - 1][iz - 1]; - // - Bzl += weight000 * Bz[ix][iy][iz]; - Bzl += weight001 * Bz[ix][iy][iz - 1]; - Bzl += weight010 * Bz[ix][iy - 1][iz]; - Bzl += weight011 * Bz[ix][iy - 1][iz - 1]; - Bzl += weight100 * Bz[ix - 1][iy][iz]; - Bzl += weight101 * Bz[ix - 1][iy][iz - 1]; - Bzl += weight110 * Bz[ix - 1][iy - 1][iz]; - Bzl += weight111 * Bz[ix - 1][iy - 1][iz - 1]; - // - Exl += weight000 * Ex[ix][iy][iz]; - Exl += weight001 * Ex[ix][iy][iz - 1]; - Exl += weight010 * Ex[ix][iy - 1][iz]; - Exl += weight011 * Ex[ix][iy - 1][iz - 1]; - Exl += weight100 * Ex[ix - 1][iy][iz]; - Exl += weight101 * Ex[ix - 1][iy][iz - 1]; - Exl += weight110 * Ex[ix - 1][iy - 1][iz]; - Exl += weight111 * Ex[ix - 1][iy - 1][iz - 1]; - // - Eyl += weight000 * Ey[ix][iy][iz]; - Eyl += weight001 * Ey[ix][iy][iz - 1]; - Eyl += weight010 * Ey[ix][iy - 1][iz]; - Eyl += weight011 * Ey[ix][iy - 1][iz - 1]; - Eyl += weight100 * Ey[ix - 1][iy][iz]; - Eyl += weight101 * Ey[ix - 1][iy][iz - 1]; - Eyl += weight110 * Ey[ix - 1][iy - 1][iz]; - Eyl += weight111 * Ey[ix - 1][iy - 1][iz - 1]; - // - Ezl += weight000 * Ez[ix][iy][iz]; - Ezl += weight001 * Ez[ix][iy][iz - 1]; - Ezl += weight010 * Ez[ix][iy - 1][iz]; - Ezl += weight011 * Ez[ix][iy - 1][iz - 1]; - Ezl += weight100 * Ez[ix - 1][iy][iz]; - Ezl += weight101 * Ez[ix - 1][iy][iz - 1]; - Ezl += weight110 * Ez[ix - 1][iy - 1][iz]; - Ezl += weight111 * Ez[ix - 1][iy - 1][iz - 1]; - #endif + arr1_pfloat_get field_components[8]; + field_components[0] = fieldForPcls[ix ][iy ][iz ]; // field000 + field_components[1] = fieldForPcls[ix ][iy ][iz-1]; // field001 + field_components[2] = fieldForPcls[ix ][iy-1][iz ]; // field010 + field_components[3] = fieldForPcls[ix ][iy-1][iz-1]; // field011 + field_components[4] = fieldForPcls[ix-1][iy ][iz ]; // field100 + field_components[5] = fieldForPcls[ix-1][iy ][iz-1]; // field101 + field_components[6] = fieldForPcls[ix-1][iy-1][iz ]; // field110 + field_components[7] = fieldForPcls[ix-1][iy-1][iz-1]; // field111 + + for(int c=0; c<8; c++) + { + Bxl += weights[c] * field_components[c][0]; + Byl += weights[c] * field_components[c][1]; + Bzl += weights[c] * field_components[c][2]; + Exl += weights[c] * field_components[c][3]; + Eyl += weights[c] * field_components[c][4]; + Ezl += weights[c] * field_components[c][5]; + } // end interpolation const pfloat omdtsq = qomdt2 * qomdt2 * (Bxl * Bxl + Byl * Byl + Bzl * Bzl); @@ -608,18 +441,18 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { zp = zptilde + wptilde * dto2; } // end of iteration // update the final position and velocity - up = 2.0 * uptilde - u[rest]; - vp = 2.0 * vptilde - v[rest]; - wp = 2.0 * wptilde - w[rest]; + up = 2.0 * uptilde - up_orig; + vp = 2.0 * vptilde - vp_orig; + wp = 2.0 * wptilde - wp_orig; xp = xptilde + uptilde * dt; yp = yptilde + vptilde * dt; zp = zptilde + wptilde * dt; - x[rest] = xp; - y[rest] = yp; - z[rest] = zp; - u[rest] = up; - v[rest] = vp; - w[rest] = wp; + x[pidx] = xp; + y[pidx] = yp; + z[pidx] = zp; + u[pidx] = up; + v[pidx] = vp; + w[pidx] = wp; } // END OF ALL THE PARTICLES } From e1e32ae8bacb6ad68680d1e7a154d7b01b095410 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Thu, 17 Oct 2013 04:09:08 +0200 Subject: [PATCH 066/118] mover_PC: renamed vars, used Om:=B*q*dt/(2*m*c) to reduce multiplications --- particles/Particles3D.cpp | 95 +++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 50 deletions(-) diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 55d98962..5f7bb915 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -318,34 +318,31 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { } const_arr4_pfloat fieldForPcls = EMf->get_fieldForPcls(); - const pfloat dto2 = .5 * dt, qomdt2 = qom * dto2 / c; + const pfloat dto2 = .5 * dt, qdto2mc = qom * dto2 / c; const pfloat inv_dx = 1.0 / dx, inv_dy = 1.0 / dy, inv_dz = 1.0 / dz; #pragma omp for // why does single precision make no difference in execution speed? //#pragma simd vectorlength(VECTOR_WIDTH) for (int pidx = 0; pidx < nop; pidx++) { // copy the particle - const pfloat xptilde = x[pidx]; - const pfloat yptilde = y[pidx]; - const pfloat zptilde = z[pidx]; - const pfloat up_orig = u[pidx]; - const pfloat vp_orig = v[pidx]; - const pfloat wp_orig = w[pidx]; - pfloat xp = xptilde; - pfloat yp = yptilde; - pfloat zp = zptilde; - pfloat up = up_orig; - pfloat vp = vp_orig; - pfloat wp = wp_orig; - pfloat uptilde; - pfloat vptilde; - pfloat wptilde; + const pfloat xorig = x[pidx]; + const pfloat yorig = y[pidx]; + const pfloat zorig = z[pidx]; + const pfloat uorig = u[pidx]; + const pfloat vorig = v[pidx]; + const pfloat worig = w[pidx]; + pfloat xavg = xorig; + pfloat yavg = yorig; + pfloat zavg = zorig; + pfloat uavg; + pfloat vavg; + pfloat wavg; // calculate the average velocity iteratively for (int innter = 0; innter < NiterMover; innter++) { // interpolation G-->P - const pfloat ixd = floor((xp - xstart) * inv_dx); - const pfloat iyd = floor((yp - ystart) * inv_dy); - const pfloat izd = floor((zp - zstart) * inv_dz); + const pfloat ixd = floor((xavg - xstart) * inv_dx); + const pfloat iyd = floor((yavg - ystart) * inv_dy); + const pfloat izd = floor((zavg - zstart) * inv_dz); int ix = 2 + int (ixd); int iy = 2 + int (iyd); int iz = 2 + int (izd); @@ -362,12 +359,12 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { if (iz > nzn - 1) iz = nzn - 1; - const pfloat xi0 = xp - grid->get_pfloat_XN(ix-1); - const pfloat eta0 = yp - grid->get_pfloat_YN(iy-1); - const pfloat zeta0 = zp - grid->get_pfloat_ZN(iz-1); - const pfloat xi1 = grid->get_pfloat_XN(ix) - xp; - const pfloat eta1 = grid->get_pfloat_YN(iy) - yp; - const pfloat zeta1 = grid->get_pfloat_ZN(iz) - zp; + const pfloat xi0 = xavg - grid->get_pfloat_XN(ix-1); + const pfloat eta0 = yavg - grid->get_pfloat_YN(iy-1); + const pfloat zeta0 = zavg - grid->get_pfloat_ZN(iz-1); + const pfloat xi1 = grid->get_pfloat_XN(ix) - xavg; + const pfloat eta1 = grid->get_pfloat_YN(iy) - yavg; + const pfloat zeta1 = grid->get_pfloat_ZN(iz) - zavg; pfloat Exl = 0.0; pfloat Eyl = 0.0; @@ -422,37 +419,35 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { Eyl += weights[c] * field_components[c][4]; Ezl += weights[c] * field_components[c][5]; } + const double Omx = qdto2mc*Bxl; + const double Omy = qdto2mc*Byl; + const double Omz = qdto2mc*Bzl; // end interpolation - const pfloat omdtsq = qomdt2 * qomdt2 * (Bxl * Bxl + Byl * Byl + Bzl * Bzl); - const pfloat denom = 1.0 / (1.0 + omdtsq); + const pfloat omsq = (Omx * Omx + Omy * Omy + Omz * Omz); + const pfloat denom = 1.0 / (1.0 + omsq); // solve the position equation - const pfloat ut = up + qomdt2 * Exl; - const pfloat vt = vp + qomdt2 * Eyl; - const pfloat wt = wp + qomdt2 * Ezl; - const pfloat udotb = ut * Bxl + vt * Byl + wt * Bzl; + const pfloat ut = uorig + qdto2mc * Exl; + const pfloat vt = vorig + qdto2mc * Eyl; + const pfloat wt = worig + qdto2mc * Ezl; + //const pfloat udotb = ut * Bxl + vt * Byl + wt * Bzl; + const pfloat udotOm = ut * Omx + vt * Omy + wt * Omz; // solve the velocity equation - uptilde = (ut + qomdt2 * (vt * Bzl - wt * Byl + qomdt2 * udotb * Bxl)) * denom; - vptilde = (vt + qomdt2 * (wt * Bxl - ut * Bzl + qomdt2 * udotb * Byl)) * denom; - wptilde = (wt + qomdt2 * (ut * Byl - vt * Bxl + qomdt2 * udotb * Bzl)) * denom; - // update position - xp = xptilde + uptilde * dto2; - yp = yptilde + vptilde * dto2; - zp = zptilde + wptilde * dto2; + uavg = (ut + (vt * Omz - wt * Omy + udotOm * Omx)) * denom; + vavg = (vt + (wt * Omx - ut * Omz + udotOm * Omy)) * denom; + wavg = (wt + (ut * Omy - vt * Omx + udotOm * Omz)) * denom; + // update average position + xavg = xorig + uavg * dto2; + yavg = yorig + vavg * dto2; + zavg = zorig + wavg * dto2; } // end of iteration // update the final position and velocity - up = 2.0 * uptilde - up_orig; - vp = 2.0 * vptilde - vp_orig; - wp = 2.0 * wptilde - wp_orig; - xp = xptilde + uptilde * dt; - yp = yptilde + vptilde * dt; - zp = zptilde + wptilde * dt; - x[pidx] = xp; - y[pidx] = yp; - z[pidx] = zp; - u[pidx] = up; - v[pidx] = vp; - w[pidx] = wp; + x[pidx] = xorig + uavg * dt; + y[pidx] = yorig + vavg * dt; + z[pidx] = zorig + wavg * dt; + u[pidx] = 2.0 * uavg - uorig; + v[pidx] = 2.0 * vavg - vorig; + w[pidx] = 2.0 * wavg - worig; } // END OF ALL THE PARTICLES } From ca89d599706cee9ac5a255284054bb08f6eaf916 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Thu, 17 Oct 2013 09:42:12 +0200 Subject: [PATCH 067/118] enforcing that field and particle topology coincide (assumed in subsequent execution) --- communication/VCtopology3D.cpp | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/communication/VCtopology3D.cpp b/communication/VCtopology3D.cpp index b88d0878..7b6db581 100644 --- a/communication/VCtopology3D.cpp +++ b/communication/VCtopology3D.cpp @@ -3,6 +3,8 @@ #include "Collective.h" #include "VCtopology3D.h" #include +#include "MPIdata.h" +#include "debug.h" using std::cout; using std::endl; @@ -62,6 +64,11 @@ void VCtopology3D::setup_vctopology(MPI_Comm old_comm) { MPI_Cart_create(old_comm, 3, divisions, periods, reorder, &CART_COMM); // create a matrix with ranks, and neighbours for Particles MPI_Cart_create(old_comm, 3, divisions, periods_P, reorder, &CART_COMM_P); + // Why not the following line instead of the previous? Was + // this written in anticipation that a different number of MPI + // processes would be used for fields versus for particles? + // But the code has not been consistently written this way... + //MPI_Cart_create(CART_COMM, 3, divisions, periods_P, 0, &CART_COMM_P); // field Communicator if (CART_COMM != MPI_COMM_NULL) { MPI_Comm_rank(CART_COMM, &cartesian_rank); @@ -72,21 +79,32 @@ void VCtopology3D::setup_vctopology(MPI_Comm old_comm) { MPI_Cart_shift(CART_COMM, ZDIR, RIGHT, &zleft_neighbor, &zright_neighbor); } else { - // EXCEPTION - cout << "A process is trown away from the new topology for fields. VCtopology3D.h" << endl; + // previous check that nprocs = XLEN*YLEN*ZLEN should prevent reaching this line. + eprintf("A process is thrown away from the new topology for fields."); } // Particles Communicator if (CART_COMM_P != MPI_COMM_NULL) { - MPI_Comm_rank(CART_COMM_P, &cartesian_rank); - MPI_Cart_coords(CART_COMM_P, cartesian_rank, 3, coordinates); + int pcl_coordinates[3]; + int pcl_cartesian_rank; + MPI_Comm_rank(CART_COMM_P, &pcl_cartesian_rank); + MPI_Cart_coords(CART_COMM_P, pcl_cartesian_rank, 3, pcl_coordinates); + + // This seems to be assumed elsewhere in the code. + assert_eq(cartesian_rank, MPIdata::get_rank()); + // should agree + assert_eq(cartesian_rank,pcl_cartesian_rank); + for(int dim=0;dim<3;dim++) + { + assert_eq(coordinates[dim],pcl_coordinates[dim]); + } MPI_Cart_shift(CART_COMM_P, XDIR, RIGHT, &xleft_neighbor_P, &xright_neighbor_P); MPI_Cart_shift(CART_COMM_P, YDIR, RIGHT, &yleft_neighbor_P, &yright_neighbor_P); MPI_Cart_shift(CART_COMM_P, ZDIR, RIGHT, &zleft_neighbor_P, &zright_neighbor_P); } else { - // EXCEPTION - cout << "A process is trown away from the new topology for Particles. VCtopology3D.h" << endl; + // previous check that nprocs = XLEN*YLEN*ZLEN should prevent reaching this line. + eprintf("A process is thrown away from the new topology for Particles."); } } From 78e9e16555ebe0cfe8ef7f322c28d557f929f986 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Thu, 17 Oct 2013 09:44:01 +0200 Subject: [PATCH 068/118] print error once only when nxc/XLEN is non-integer --- grids/Grid3DCU.cpp | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/grids/Grid3DCU.cpp b/grids/Grid3DCU.cpp index fa6bdd49..e4b7d166 100644 --- a/grids/Grid3DCU.cpp +++ b/grids/Grid3DCU.cpp @@ -1,11 +1,11 @@ #include #include "Grid3DCU.h" +#include "MPIdata.h" /*! constructor */ Grid3DCU::Grid3DCU(CollectiveIO * col, VirtualTopology3D * vct) { - // int get_rank(); - // if(!get_rank()) + if(!MPIdata::get_rank()) { fflush(stdout); bool xerror = false; @@ -37,17 +37,18 @@ Grid3DCU::Grid3DCU(CollectiveIO * col, VirtualTopology3D * vct) { invdz = 1.0 / dz; // local grid dimensions and boundaries of active nodes - xStart = vct->getCoordinates(0) * (col->getLx() / (double) vct->getXLEN()); - - xEnd = xStart + (col->getLx() / (double) vct->getXLEN()); - - yStart = vct->getCoordinates(1) * (col->getLy() / (double) vct->getYLEN()); - - yEnd = yStart + (col->getLy() / (double) vct->getYLEN()); - - zStart = vct->getCoordinates(2) * (col->getLz() / (double) vct->getZLEN()); - - zEnd = zStart + (col->getLz() / (double) vct->getZLEN()); + // + const double xWidth = (col->getLx() / (double) vct->getXLEN()); + const double yWidth = (col->getLy() / (double) vct->getYLEN()); + const double zWidth = (col->getLz() / (double) vct->getZLEN()); + // + xStart = vct->getCoordinates(0) * xWidth; + yStart = vct->getCoordinates(1) * yWidth; + zStart = vct->getCoordinates(2) * zWidth; + // + xEnd = xStart + xWidth; + yEnd = yStart + yWidth; + zEnd = zStart + zWidth; // arrays allocation: nodes ---> the first node has index 1, the last has index nxn-2! pfloat_node_xcoord = new pfloat[nxn]; From b106afac26df54c48ff267cecfaf8f93764f0f65 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Thu, 17 Oct 2013 09:47:25 +0200 Subject: [PATCH 069/118] eliminate Particles abstract base class (cf. iss #18, #41) --- include/Particles.h | 113 +++++++++++++++++----------------- include/Particles3Dcomm.h | 63 +++++++++---------- particles/Particles3Dcomm.cpp | 69 +-------------------- 3 files changed, 86 insertions(+), 159 deletions(-) diff --git a/include/Particles.h b/include/Particles.h index cca553c0..2abe6c4a 100644 --- a/include/Particles.h +++ b/include/Particles.h @@ -21,60 +21,61 @@ developers: Stefano Markidis, Giovanni Lapenta * */ -class Particles { -public: - /** allocate particles */ - virtual void allocate(int species, CollectiveIO * col, VirtualTopology3D * vct, Grid * grid) = 0; - /** interpolation Particle -> grid */ - virtual void interpP2G(Field * EMf, Grid * grid, VirtualTopology3D * vct) = 0; - - - /** get X-position array for all the particles */ - virtual double *getXall() const = 0; - /** get Y-position array for all the particles */ - virtual double *getYall() const = 0; - /** get Z-position array for all the particles */ - virtual double *getZall() const = 0; - /** get u (X-velocity) array for all the particles */ - virtual double *getUall() const = 0; - /** get v (Y-velocity) array for all the particles */ - virtual double *getVall() const = 0; - /** get w (Z-velocity) array for all the particles */ - virtual double *getWall() const = 0; - /** get ID array for all the particles */ - virtual long long *getParticleIDall() const = 0; - /**get charge of particle array */ - virtual double *getQall() const = 0; - /** get X-position of particle with label indexPart */ - virtual double getX(int indexPart) const = 0; - /** get Y-position of particle with label indexPart */ - virtual double getY(int indexPart) const = 0; - /** get Z-position of particle with label indexPart */ - virtual double getZ(int indexPart) const = 0; - /** get u (X-velocity) of particle with label indexPart */ - virtual double getU(int indexPart) const = 0; - /** get v (Y-velocity) of particle with label indexPart */ - virtual double getV(int indexPart) const = 0; - /** get w (Z-velocity) of particle with label indexPart */ - virtual double getW(int indexPart) const = 0; - /** get ID of particle with label indexPart */ - virtual long long getParticleID(int indexPart) const = 0; - /**get charge of particle with label indexPart */ - virtual double getQ(int indexPart) const = 0; - /** get the number of particles of this subdomain */ - virtual int getNOP() const = 0; - /** return the Kinetic energy */ - virtual double getKe() = 0; - /** return the maximum kinetic energy */ - virtual double getMaxVelocity() = 0; - /** return energy distribution*/ - virtual long long *getVelocityDistribution(int nBins, double maxVel) = 0; - /** retturn the momentum */ - virtual double getP() = 0; - /** Print particles info: positions, velocities */ - virtual void Print(VirtualTopology3D * ptVCT) const = 0; - /** Print the number of particles of this subdomain */ - virtual void PrintNp(VirtualTopology3D * ptVCT) const = 0; - -}; +#include "Particles3Dcomm.h" +//class Particles { +//public: +// /** allocate particles */ +// virtual void allocate(int species, CollectiveIO * col, VirtualTopology3D * vct, Grid * grid) = 0; +// /** interpolation Particle -> grid */ +// virtual void interpP2G(Field * EMf, Grid * grid, VirtualTopology3D * vct) = 0; +// +// +// /** get X-position array for all the particles */ +// virtual double *getXall() const = 0; +// /** get Y-position array for all the particles */ +// virtual double *getYall() const = 0; +// /** get Z-position array for all the particles */ +// virtual double *getZall() const = 0; +// /** get u (X-velocity) array for all the particles */ +// virtual double *getUall() const = 0; +// /** get v (Y-velocity) array for all the particles */ +// virtual double *getVall() const = 0; +// /** get w (Z-velocity) array for all the particles */ +// virtual double *getWall() const = 0; +// /** get ID array for all the particles */ +// virtual long long *getParticleIDall() const = 0; +// /**get charge of particle array */ +// virtual double *getQall() const = 0; +// /** get X-position of particle with label indexPart */ +// virtual double getX(int indexPart) const = 0; +// /** get Y-position of particle with label indexPart */ +// virtual double getY(int indexPart) const = 0; +// /** get Z-position of particle with label indexPart */ +// virtual double getZ(int indexPart) const = 0; +// /** get u (X-velocity) of particle with label indexPart */ +// virtual double getU(int indexPart) const = 0; +// /** get v (Y-velocity) of particle with label indexPart */ +// virtual double getV(int indexPart) const = 0; +// /** get w (Z-velocity) of particle with label indexPart */ +// virtual double getW(int indexPart) const = 0; +// /** get ID of particle with label indexPart */ +// virtual long long getParticleID(int indexPart) const = 0; +// /**get charge of particle with label indexPart */ +// virtual double getQ(int indexPart) const = 0; +// /** get the number of particles of this subdomain */ +// virtual int getNOP() const = 0; +// /** return the Kinetic energy */ +// virtual double getKe() = 0; +// /** return the maximum kinetic energy */ +// virtual double getMaxVelocity() = 0; +// /** return energy distribution*/ +// virtual long long *getVelocityDistribution(int nBins, double maxVel) = 0; +// /** retturn the momentum */ +// virtual double getP() = 0; +// /** Print particles info: positions, velocities */ +// virtual void Print(VirtualTopology3D * ptVCT) const = 0; +// /** Print the number of particles of this subdomain */ +// virtual void PrintNp(VirtualTopology3D * ptVCT) const = 0; +// +//}; #endif diff --git a/include/Particles3Dcomm.h b/include/Particles3Dcomm.h index 1e646681..8ebae324 100644 --- a/include/Particles3Dcomm.h +++ b/include/Particles3Dcomm.h @@ -16,7 +16,8 @@ developers: Stefano Markidis, Giovanni Lapenta * @version 2.0 * */ -class Particles3Dcomm:public Particles { +class Particles3Dcomm // :public Particles +{ public: /** constructor */ Particles3Dcomm(); @@ -57,40 +58,31 @@ class Particles3Dcomm:public Particles { int maxNpExiting(); /** calculate the weights given the position of particles */ // void calculateWeights(double*** weight, double xp, double yp, double zp,int ix, int iy, int iz, Grid* grid); - /** get X-position array for all the particles */ - double *getXall() const; - /** get Y-position array for all the particles */ - double *getYall() const; - /** get Z-position array for all the particles */ - double *getZall() const; - /** get u (X-velocity) array for all the particles */ - double *getUall() const; - /** get v (Y-velocity) array for all the particles */ - double *getVall() const; - /** get w (Z-velocity) array for all the particles */ - double *getWall() const; - /** get the ID array */ - long long *getParticleIDall() const; - /** get X-position of particle with label indexPart */ - double getX(int indexPart) const; - /** get Y-position of particle with label indexPart */ - double getY(int indexPart) const; - /** get Z-position of particle with label indexPart */ - double getZ(int indexPart) const; - /** get u (X-velocity) of particle with label indexPart */ - double getU(int indexPart) const; - /** get v (Y-velocity) of particle with label indexPart */ - double getV(int indexPart) const; - /** get w (Z-velocity) of particle with label indexPart */ - double getW(int indexPart) const; - /** get ID of particle with label indexPart */ - long long getParticleID(int indexPart) const; - /**get charge of particle with label indexPart */ - double getQ(int indexPart) const; - /** get charge of array for ID particles */ - double *getQall() const; - /** get the number of particles of this subdomain */ - int getNOP() const; + + // inline get accessors + // + double *getXall() const { return (x); } + double *getYall() const { return (y); } + double *getZall() const { return (z); } + double *getUall() const { return (u); } + double *getVall() const { return (v); } + double *getWall() const { return (w); } + long long *getParticleIDall() const { return (ParticleID); } + double *getQall() const { return (q); } + // accessors for particle with index indexPart + double getX(int indexPart) const { return (x[indexPart]); } + double getY(int indexPart) const { return (y[indexPart]); } + double getZ(int indexPart) const { return (z[indexPart]); } + double getU(int indexPart) const { return (u[indexPart]); } + double getV(int indexPart) const { return (v[indexPart]); } + double getW(int indexPart) const { return (w[indexPart]); } + long long getParticleID(int indexPart) const + { return (ParticleID[indexPart]); } + double getQ(int indexPart) const { return (q[indexPart]); } + int getNOP() const { return (nop); } + + // computed get access + // /** return the Kinetic energy */ double getKe(); /** return the maximum kinetic energy */ @@ -262,5 +254,6 @@ class Particles3Dcomm:public Particles { double Ninj; }; +typedef Particles3Dcomm Particles; #endif diff --git a/particles/Particles3Dcomm.cpp b/particles/Particles3Dcomm.cpp index 13a443c0..acdccdab 100644 --- a/particles/Particles3Dcomm.cpp +++ b/particles/Particles3Dcomm.cpp @@ -809,74 +809,7 @@ int Particles3Dcomm::maxNpExiting() { maxNp = npExitZleft; return (maxNp); } -/** return X-coordinate of particle array */ -double *Particles3Dcomm::getXall() const { - return (x); -} -/** return Y-coordinate of particle array */ -double *Particles3Dcomm::getYall() const { - return (y); -} -/** return Z-coordinate of particle array*/ -double *Particles3Dcomm::getZall() const { - return (z); -} -/** get X-velocity of particle with label indexPart */ -double *Particles3Dcomm::getUall() const { - return (u); -} -/** get Y-velocity of particle with label indexPart */ -double *Particles3Dcomm::getVall() const { - return (v); -} -/**get Z-velocity of particle with label indexPart */ -double *Particles3Dcomm::getWall() const { - return (w); -} -/**get ID of particle with label indexPart */ -long long *Particles3Dcomm::getParticleIDall() const { - return (ParticleID); -} -/**get charge of particle with label indexPart */ -double *Particles3Dcomm::getQall() const { - return (q); -} -/** return X-coordinate of particle with index indexPart */ -double Particles3Dcomm::getX(int indexPart) const { - return (x[indexPart]); -} -/** return Y-coordinate of particle with index indexPart */ -double Particles3Dcomm::getY(int indexPart) const { - return (y[indexPart]); -} -/** return Y-coordinate of particle with index indexPart */ -double Particles3Dcomm::getZ(int indexPart) const { - return (z[indexPart]); -} -/** get u (X-velocity) of particle with label indexPart */ -double Particles3Dcomm::getU(int indexPart) const { - return (u[indexPart]); -} -/** get v (Y-velocity) of particle with label indexPart */ -double Particles3Dcomm::getV(int indexPart) const { - return (v[indexPart]); -} -/**get w (Z-velocity) of particle with label indexPart */ -double Particles3Dcomm::getW(int indexPart) const { - return (w[indexPart]); -} -/**get ID of particle with label indexPart */ -long long Particles3Dcomm::getParticleID(int indexPart) const { - return (ParticleID[indexPart]); -} -/**get charge of particle with label indexPart */ -double Particles3Dcomm::getQ(int indexPart) const { - return (q[indexPart]); -} -/** return the number of particles */ -int Particles3Dcomm::getNOP() const { - return (nop); -} + /** return the Kinetic energy */ double Particles3Dcomm::getKe() { double localKe = 0.0; From 64541ffccb4677dac465620b73cc1797ca88472e Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 10 Jan 2014 21:22:21 +0100 Subject: [PATCH 070/118] exclude unused files from ctags file --- scripts/ipic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ipic.py b/scripts/ipic.py index 90948c4b..1c9d90ad 100755 --- a/scripts/ipic.py +++ b/scripts/ipic.py @@ -16,7 +16,7 @@ def ipic_ctags(args): # create tags file using ctags create_tags_command = \ - '''find . -name '*.cpp' -or -name '*.h' | xargs ctags --extra=+qf''' + '''find . -name '*.cpp' -or -name '*.h' | grep -v unused | xargs ctags --extra=+qf''' print create_tags_command os.system(create_tags_command) # sort tags file From 4b97b5fbf95fd7a680d2a62448d76866e260e30c Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 10 Jan 2014 21:32:53 +0100 Subject: [PATCH 071/118] vectorized mover and summing moments at cost of sorting serially --- CMakeLists.txt | 2 +- fields/EMfields3D.cpp | 177 ++++++++++++++++++ include/EMfields3D.h | 4 +- include/Grid3DCU.h | 3 + include/Particles3D.h | 2 + include/Particles3Dcomm.h | 112 +++++++++++- include/TimeTasks.h | 3 + include/arraysfwd.h | 14 +- main/Parameters.cpp | 16 ++ main/iPic3Dlib.cpp | 30 +++- particles/Particles3D.cpp | 235 ++++++++++++++++++++++-- particles/Particles3Dcomm.cpp | 326 +++++++++++++++++++++++++++++++++- utility/TimeTasks.cpp | 3 + 13 files changed, 893 insertions(+), 34 deletions(-) create mode 100644 main/Parameters.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c6b9473e..f70f8451 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,7 +95,7 @@ file( processtopology/*.cpp solvers/*.cpp utility/*.cpp - main/iPic3Dlib.cpp + main/*.cpp ) # diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 972361e2..00240a67 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -540,6 +540,183 @@ void EMfields3D::sumMoments(const Particles3Dcomm* part, Grid * grid, VirtualTop } } +void EMfields3D::sumMoments_vectorized( + const Particles3Dcomm* part, Grid * grid, VirtualTopology3D * vct) +{ + const double inv_dx = grid->get_invdx(); + const double inv_dy = grid->get_invdy(); + const double inv_dz = grid->get_invdz(); + const int nxn = grid->getNXN(); + const int nyn = grid->getNYN(); + const int nzn = grid->getNZN(); + const double xstart = grid->getXstart(); + const double ystart = grid->getYstart(); + const double zstart = grid->getZstart(); + #pragma omp parallel + for (int species_idx = 0; species_idx < ns; species_idx++) + { + const Particles3Dcomm& pcls = part[species_idx]; + const int is = pcls.get_ns(); + assert_eq(species_idx,is); + + double const*const x = pcls.getXall(); + double const*const y = pcls.getYall(); + double const*const z = pcls.getZall(); + double const*const u = pcls.getUall(); + double const*const v = pcls.getVall(); + double const*const w = pcls.getWall(); + double const*const q = pcls.getQall(); + + const int nop = pcls.getNOP(); + #pragma omp master + timeTasks_begin_task(TimeTasks::MOMENT_ACCUMULATION); + Moments10& speciesMoments10 = fetch_moments10Array(0); + speciesMoments10.set_to_zero(); + arr4_double moments = speciesMoments10.fetch_arr(); + #pragma omp for collapse(2) // schedule(static) + for(int cx=0;cxgetCartesian_rank() == 0) diff --git a/include/EMfields3D.h b/include/EMfields3D.h index 2aaef7c4..ab0cc1df 100644 --- a/include/EMfields3D.h +++ b/include/EMfields3D.h @@ -119,8 +119,10 @@ class EMfields3D // :public Field void set_fieldForPcls(); /*! communicate ghost for grid -> Particles interpolation */ void communicateGhostP2G(int ns, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, VirtualTopology3D * vct); - void sumMomentsOld(const Particles3Dcomm& pcls, Grid * grid, VirtualTopology3D * vct); + /*! sum moments (interp_P2G) versions */ void sumMoments(const Particles3Dcomm* part, Grid * grid, VirtualTopology3D * vct); + void sumMoments_vectorized(const Particles3Dcomm* part, Grid * grid, VirtualTopology3D * vct); + void sumMomentsOld(const Particles3Dcomm& pcls, Grid * grid, VirtualTopology3D * vct); /*! add accumulated moments to the moments for a given species */ //void addToSpeciesMoments(const TenMoments & in, int is); /*! add an amount of charge density to charge density field at node X,Y,Z */ diff --git a/include/Grid3DCU.h b/include/Grid3DCU.h index b7eb7a6b..03b56c1d 100644 --- a/include/Grid3DCU.h +++ b/include/Grid3DCU.h @@ -163,6 +163,9 @@ class Grid3DCU // :public Grid double getDX() { return (dx); } double getDY() { return (dy); } double getDZ() { return (dz); } + double get_invdx() { return (invdx); } + double get_invdy() { return (invdy); } + double get_invdz() { return (invdz); } // // coordinate accessors // diff --git a/include/Particles3D.h b/include/Particles3D.h index 74cfbf37..ccd210b4 100644 --- a/include/Particles3D.h +++ b/include/Particles3D.h @@ -58,6 +58,8 @@ class Particles3D:public Particles3Dcomm { void mover_explicit(Grid * grid, VirtualTopology3D * vct, Field * EMf); /** mover with a Predictor-Corrector Scheme */ void mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf); + /** vectorized version of mover_PC **/ + void mover_PC_vectorized(Grid * grid, VirtualTopology3D * vct, Field * EMf); /** communicate particle after moving them */ int communicate_particles(VirtualTopology3D * vct); /** relativistic mover with a Predictor-Corrector scheme */ diff --git a/include/Particles3Dcomm.h b/include/Particles3Dcomm.h index 8ebae324..5d40cca8 100644 --- a/include/Particles3Dcomm.h +++ b/include/Particles3Dcomm.h @@ -59,6 +59,53 @@ class Particles3Dcomm // :public Particles /** calculate the weights given the position of particles */ // void calculateWeights(double*** weight, double xp, double yp, double zp,int ix, int iy, int iz, Grid* grid); + /*! sort particles for vectorized push (needs to be parallelized) */ + void sort_particles_serial(Grid * grid, VirtualTopology3D * vct); + /*! sort particles with respect to provided position data */ + void sort_particles_serial( + pfloat *xpos, pfloat *ypos, pfloat *zpos, + Grid * grid, VirtualTopology3D * vct); + void get_safe_cell_for_pos( + int& cx, int& cy, int& cz, + pfloat xpos, pfloat ypos, pfloat zpos) + { + // xstart is left edge of domain excluding ghost cells + // cx=0 for ghost cell layer. + cx = 1 + int(floor((xpos - xstart) * inv_dx)); + cy = 1 + int(floor((ypos - ystart) * inv_dy)); + cz = 1 + int(floor((zpos - zstart) * inv_dz)); + // + // if the cell is outside the domain, then treat it as + // in the nearest ghost cell. + // + if (cx < 0) cx = 0; + if (cy < 0) cy = 0; + if (cz < 0) cz = 0; + // number of cells in x direction including ghosts is nxc + if (cx >= nxc) cx = nxc-1; + if (cy >= nyc) cy = nyc-1; + if (cz >= nzc) cz = nzc-1; + } + + /*! version that assumes particle is in domain */ + void get_cell_for_pos_in_domain( + int& cx, int& cy, int& cz, + pfloat xpos, pfloat ypos, pfloat zpos) + { + // xstart is left edge of domain excluding ghost cells + // cx=0 for ghost cell layer. + cx = 1 + int(floor((xpos - xstart) * inv_dx)); + cy = 1 + int(floor((ypos - ystart) * inv_dy)); + cz = 1 + int(floor((zpos - zstart) * inv_dz)); + // + assert_le(0,cx); + assert_le(0,cy); + assert_le(0,cz); + assert_le(cx,nxc); + assert_le(cy,nyc); + assert_le(cz,nzc); + } + // inline get accessors // double *getXall() const { return (x); } @@ -99,6 +146,10 @@ class Particles3Dcomm // :public Particles public: // accessors int get_ns()const{return ns;} + int get_numpcls_in_bucket(int cx, int cy, int cz)const + { return (*numpcls_in_bucket)[cx][cy][cz]; } + int get_bucket_offset(int cx, int cy, int cz)const + { return (*bucket_offset)[cx][cy][cz]; } protected: /** number of this species */ @@ -133,7 +184,10 @@ class Particles3Dcomm // :public Particles double v0; /** w0 Drift velocity - Direction Z */ double w0; - /** Positions arra - X component */ + + // particles data + // + /** Positions array - X component */ double *x; /** Positions array - Y component */ double *y; @@ -145,16 +199,58 @@ class Particles3Dcomm // :public Particles double *v; /** Velocities array - Z component */ double *w; + /** Charge array */ + double *q; /** TrackParticleID */ bool TrackParticleID; /** ParticleID */ long long *ParticleID; + /** Average position data (used during particle push) **/ + double *xavg; + double *yavg; + double *zavg; + + // structures for sorting particles + // + // alternate storage for sorting particles + // + double *xtmp; + double *ytmp; + double *ztmp; + double *utmp; + double *vtmp; + double *wtmp; + double *qtmp; + long long *ParticleIDtmp; + double *xavgtmp; + double *yavgtmp; + double *zavgtmp; + //int *xcell; + //int *ycell; + //int *zcell; + + // references for buckets + // + array3_int* numpcls_in_bucket; + array3_int* numpcls_in_bucket_now; // accumulator used during sorting + //array3_int* bucket_size; // maximum number of particles in bucket + array3_int* bucket_offset; + // + // bucket totals per thread + // + //int num_threads; + //array3_int* numpcls_in_bucket_thr; + //arr3_int fetch_numpcls_in_bucket_thr(int i) + //{ + // assert_le(0,i); + // assert_lt(i,num_threads); + // return *(numpcls_in_bucket_thr[i]); + //}; + /** rank of processor in which particle is created (for ID) */ int BirthRank[2]; /** number of variables to be stored in buffer for communication for each particle */ int nVar; - /** Charge array */ - double *q; /** Simulation domain lengths */ double xstart, xend, ystart, yend, zstart, zend, invVOL; /** time step */ @@ -167,9 +263,10 @@ class Particles3Dcomm // :public Particles double Lz; /** grid spacings */ double dx, dy, dz; - /** number of grid - nodes */ + /** number of grid nodes */ int nxn, nyn, nzn; + /** number of grid cells */ + int nxc, nyc, nzc; /** buffers for communication */ /** size of sending buffers for exiting particles, DEFINED IN METHOD "COMMUNICATE" */ int buffer_size; @@ -252,6 +349,11 @@ class Particles3Dcomm // :public Particles double Q_removed; /** density of the injection of the particles */ double Ninj; + + // convenience values from grid + double inv_dx; + double inv_dy; + double inv_dz; }; typedef Particles3Dcomm Particles; diff --git a/include/TimeTasks.h b/include/TimeTasks.h index fba54430..3ac67b23 100644 --- a/include/TimeTasks.h +++ b/include/TimeTasks.h @@ -27,8 +27,11 @@ class TimeTasks PARTICLES, LAST, // no more exclusive tasks BFIELD, + MOMENT_PCL_SORTING, MOMENT_ACCUMULATION, MOMENT_REDUCTION, + MOVER_PCL_SORTING, + MOVER_PCL_MOVING, NUMBER_OF_TASKS // this line should be last }; diff --git a/include/arraysfwd.h b/include/arraysfwd.h index 889c950d..706c057d 100644 --- a/include/arraysfwd.h +++ b/include/arraysfwd.h @@ -33,12 +33,10 @@ namespace iPic3D // - so that they can be redefined according to the user's // preferred array implementation. // -//typedef array_ref1 intArr1; -//typedef array_ref2 intArr2; -//typedef array_ref3 intArr3; -//typedef array_ref4 intArr4; -//typedef const_array_ref1 arr1_double; -//typedef const_array_ref2 arr2_double; +typedef iPic3D::array_ref1 arr1_int; +typedef iPic3D::array_ref2 arr2_int; +typedef iPic3D::array_ref3 arr3_int; +typedef iPic3D::array_ref4 arr4_int; // typedef iPic3D::const_array_ref3 const_arr3_double; typedef iPic3D::const_array_ref4 const_arr4_double; @@ -47,6 +45,10 @@ typedef iPic3D::array_ref1 arr1_double; typedef iPic3D::array_ref2 arr2_double; typedef iPic3D::array_ref3 arr3_double; typedef iPic3D::array_ref4 arr4_double; +typedef iPic3D::array1 array1_int; +typedef iPic3D::array2 array2_int; +typedef iPic3D::array3 array3_int; +typedef iPic3D::array4 array4_int; typedef iPic3D::array1 array1_double; typedef iPic3D::array2 array2_double; typedef iPic3D::array3 array3_double; diff --git a/main/Parameters.cpp b/main/Parameters.cpp new file mode 100644 index 00000000..740586de --- /dev/null +++ b/main/Parameters.cpp @@ -0,0 +1,16 @@ +#include "Parameters.h" + +using namespace Parameters; + +static bool SORTING_PARTICLES; + +void Parameters::init_parameters() +{ + SORTING_PARTICLES = get_VECTORIZE_MOMENTS() || get_VECTORIZE_MOVER(); +} + +bool Parameters::get_SORTING_PARTICLES() { return SORTING_PARTICLES; } +bool Parameters::get_VECTORIZE_MOMENTS() { return false; } +bool Parameters::get_VECTORIZE_MOVER() { return false; } +// this will also return true if we communicate particles per iteration +bool Parameters::get_USING_XAVG() { return get_VECTORIZE_MOVER(); } diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index f476f01f..891a16af 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -3,6 +3,7 @@ #include "TimeTasks.h" #include "ipicdefs.h" #include "debug.h" +#include "Parameters.h" using namespace iPic3D; MPIdata* iPic3D::c_Solver::mpi=0; @@ -16,6 +17,7 @@ int c_Solver::Init(int argc, char **argv) { // initialized MPI environment // nprocs = number of processors // myrank = rank of tha process*/ + Parameters::init_parameters(); mpi = &MPIdata::instance(); nprocs = MPIdata::get_nprocs(); myrank = MPIdata::get_rank(); @@ -178,7 +180,27 @@ void c_Solver::CalculateMoments() { EMf->updateInfoFields(grid,vct,col); EMf->setZeroDensities(); // set to zero the densities - EMf->sumMoments(part, grid, vct); + if(Parameters::get_SORTING_PARTICLES()) + { + // sort particles + #pragma omp master + timeTasks_begin_task(TimeTasks::MOMENT_PCL_SORTING); + for(int species_idx=0; species_idxsumMoments_vectorized(part, grid, vct); + } + else + { + EMf->sumMoments(part, grid, vct); + } //for (int i = 0; i < ns; i++) //{ // EMf->sumMomentsOld(part[i], grid, vct); @@ -228,7 +250,11 @@ bool c_Solver::ParticlesMover() { // #pragma omp task inout(part[i]) in(grid) target_device(booster) // // should merely pass EMf->get_fieldForPcls() rather than EMf. - part[i].mover_PC(grid, vct, EMf); // use the Predictor Corrector scheme + // use the Predictor Corrector scheme to move particles + if(Parameters::get_VECTORIZE_MOVER()) + part[i].mover_PC_vectorized(grid, vct, EMf); + else + part[i].mover_PC(grid, vct, EMf); } for (int i = 0; i < ns; i++) // move each species { diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 5f7bb915..1daed3fa 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -26,7 +26,7 @@ developers: Stefano Markidis, Giovanni Lapenta #include "Particles3D.h" - +#include "debug.h" #include "hdf5.h" #include @@ -318,9 +318,10 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { } const_arr4_pfloat fieldForPcls = EMf->get_fieldForPcls(); + #pragma omp master + timeTasks_begin_task(TimeTasks::MOVER_PCL_MOVING); const pfloat dto2 = .5 * dt, qdto2mc = qom * dto2 / c; - const pfloat inv_dx = 1.0 / dx, inv_dy = 1.0 / dy, inv_dz = 1.0 / dz; - #pragma omp for + #pragma omp for schedule(static) // why does single precision make no difference in execution speed? //#pragma simd vectorlength(VECTOR_WIDTH) for (int pidx = 0; pidx < nop; pidx++) { @@ -343,21 +344,23 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { const pfloat ixd = floor((xavg - xstart) * inv_dx); const pfloat iyd = floor((yavg - ystart) * inv_dy); const pfloat izd = floor((zavg - zstart) * inv_dz); - int ix = 2 + int (ixd); - int iy = 2 + int (iyd); - int iz = 2 + int (izd); - if (ix < 1) - ix = 1; - if (iy < 1) - iy = 1; - if (iz < 1) - iz = 1; - if (ix > nxn - 1) - ix = nxn - 1; - if (iy > nyn - 1) - iy = nyn - 1; - if (iz > nzn - 1) - iz = nzn - 1; + // interface of index to right of cell + int ix = 2 + int(ixd); + int iy = 2 + int(iyd); + int iz = 2 + int(izd); + + // use field data of closest cell in domain + // + if (ix < 1) ix = 1; + if (iy < 1) iy = 1; + if (iz < 1) iz = 1; + if (ix > nxc) ix = nxc; + if (iy > nyc) iy = nyc; + if (iz > nzc) iz = nzc; + // index of cell of particle; + //const int cx = ix - 1; + //const int cy = iy - 1; + //const int cz = iz - 1; const pfloat xi0 = xavg - grid->get_pfloat_XN(ix-1); const pfloat eta0 = yavg - grid->get_pfloat_YN(iy-1); @@ -449,6 +452,202 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { v[pidx] = 2.0 * vavg - vorig; w[pidx] = 2.0 * wavg - worig; } // END OF ALL THE PARTICLES + #pragma omp master + timeTasks_end_task(TimeTasks::MOVER_PCL_MOVING); +} + +/** mover with a Predictor-Corrector scheme */ +void Particles3D::mover_PC_vectorized( + Grid * grid, VirtualTopology3D * vct, Field * EMf) +{ + assert_eq(nxc,nxn-1); + assert_eq(nyc,nyn-1); + assert_eq(nzc,nzn-1); + #pragma omp master + if (vct->getCartesian_rank() == 0) { + cout << "*** MOVER species " << ns << " ***" << NiterMover << " ITERATIONS ****" << endl; + } + const_arr4_pfloat fieldForPcls = EMf->get_fieldForPcls(); + + // initialize average positions + #pragma omp for schedule(static) + for(int pidx = 0; pidx < nop; pidx++) + { + xavg[pidx] = x[pidx]; + yavg[pidx] = y[pidx]; + zavg[pidx] = z[pidx]; + } + + const pfloat dto2 = .5 * dt, qdto2mc = qom * dto2 / c; + for(int niter=1; niter<=NiterMover; niter++) + { + // sort particles based on the time-averaged position + if(niter>1) // on first iteration already was sorted to sum moments + { + #pragma omp master + timeTasks_begin_task(TimeTasks::MOVER_PCL_SORTING); + sort_particles_serial(xavg, yavg, zavg, grid,vct); + #pragma omp master + timeTasks_end_task(TimeTasks::MOVER_PCL_SORTING); + } + + #pragma omp master + timeTasks_begin_task(TimeTasks::MOVER_PCL_MOVING); + // move particles in parallel + // + // iterate over mesh cells + //const int ncells=nxc*nyc*nzc; + //int *numpcls_in_bucket_1d = &numpcls_in_bucket[0][0][0]; + //int *bucket_offset_1d = &bucket_offset[0][0][0]; + int serial_pidx = 0; + #pragma omp for collapse(2) // schedule(static) + for(int cx=0;cx #include #include "asserts.h" +#include // for swap #include "VirtualTopology3D.h" #include "VCtopology3D.h" #include "CollectiveIO.h" @@ -24,6 +25,7 @@ developers: Stefano Markidis, Giovanni Lapenta. #include "ompdefs.h" #include "Particles3Dcomm.h" +#include "Parameters.h" #include "hdf5.h" #include @@ -47,7 +49,7 @@ using std::endl; */ /** constructor */ -Particles3Dcomm::Particles3Dcomm() { +Particles3Dcomm::Particles3Dcomm(){ // see allocate(int species, CollectiveIO* col, VirtualTopology3D* vct, Grid* grid) } @@ -60,6 +62,22 @@ Particles3Dcomm::~Particles3Dcomm() { delete[]v; delete[]w; delete[]q; + delete[]ParticleID; + delete[]xavg; + delete[]yavg; + delete[]zavg; + // deallocate alternate storage + delete[]xtmp; + delete[]ytmp; + delete[]ztmp; + delete[]utmp; + delete[]vtmp; + delete[]wtmp; + delete[]qtmp; + delete[]ParticleIDtmp; + delete[]xavgtmp; + delete[]yavgtmp; + delete[]zavgtmp; // deallocate buffers delete[]b_X_RIGHT; delete[]b_X_LEFT; @@ -67,6 +85,9 @@ Particles3Dcomm::~Particles3Dcomm() { delete[]b_Y_LEFT; delete[]b_Z_RIGHT; delete[]b_Z_LEFT; + delete numpcls_in_bucket; + delete numpcls_in_bucket_now; + delete bucket_offset; } /** constructors fo a single species*/ void Particles3Dcomm::allocate(int species, CollectiveIO * col, VirtualTopology3D * vct, Grid * grid) { @@ -112,10 +133,19 @@ void Particles3Dcomm::allocate(int species, CollectiveIO * col, VirtualTopology3 dx = grid->getDX(); dy = grid->getDY(); dz = grid->getDZ(); + inv_dx = 1/dx; + inv_dy = 1/dy; + inv_dz = 1/dz; nxn = grid->getNXN(); nyn = grid->getNYN(); nzn = grid->getNZN(); + nxc = grid->getNXC(); + nyc = grid->getNYC(); + nzc = grid->getNZC(); + assert_eq(nxc,nxn-1); + assert_eq(nyc,nyn-1); + assert_eq(nzc,nzn-1); invVOL = grid->getInvVOL(); // info from VirtualTopology3D cVERBOSE = vct->getcVERBOSE(); @@ -131,6 +161,20 @@ void Particles3Dcomm::allocate(int species, CollectiveIO * col, VirtualTopology3 bcPfaceYleft = col->getBcPfaceYleft(); bcPfaceZright = col->getBcPfaceZright(); bcPfaceZleft = col->getBcPfaceZleft(); + // + // allocate arrays for sorting particles + // + numpcls_in_bucket = new array3_int(nxc,nyc,nzc); + numpcls_in_bucket_now = new array3_int(nxc,nyc,nzc); + bucket_offset = new array3_int(nxc,nyc,nzc); + //num_threads = omp_get_max_threads(); + //numpcls_in_bucket_thr = (arr3_int*)malloc(sizeof(void*)*num_threads); + //for(int i=0; igetCartesian_rank(); if (vct->getNprocs() > 1) BirthRank[1] = (int) ceil(log10((double) (vct->getNprocs()))); // Number of digits needed for # of process in ID @@ -887,3 +973,241 @@ void Particles3Dcomm::PrintNp(VirtualTopology3D * ptVCT) const { cout << "Subgrid (" << ptVCT->getCoordinates(0) << "," << ptVCT->getCoordinates(1) << "," << ptVCT->getCoordinates(2) << ")" << endl; cout << endl; } + +/***** particle sorting routines *****/ + +void Particles3Dcomm::sort_particles_serial(Grid * grid, VirtualTopology3D * vct) +{ + sort_particles_serial(x,y,z, grid,vct); +} + +// need to sort and communicate particles after each iteration +void Particles3Dcomm::sort_particles_serial( + double *xpos, double *ypos, double *zpos, + Grid * grid, VirtualTopology3D * vct) +{ + #pragma omp critical (sort_particles_serial) + { + numpcls_in_bucket->setall(0); + // iterate through particles and count where they will go + for (int pidx = 0; pidx < nop; pidx++) + { + // get the cell indices of the particle + // + int cx,cy,cz; + get_safe_cell_for_pos(cx,cy,cz,xpos[pidx],ypos[pidx],zpos[pidx]); + // + // is it better just to recompute this? + // + //xcell[pidx]=cx; + //ycell[pidx]=cy; + //zcell[pidx]=cz; + + // increment the number of particles in bucket of this particle + (*numpcls_in_bucket)[cx][cy][cz]++; + } + + // compute prefix sum to determine initial position + // of each bucket (could parallelize this) + // + int accpcls=0; + for(int cx=0;cxsetall(0); + // put the particles where they are supposed to go + for (int pidx = 0; pidx < nop; pidx++) + { + // get the cell indices of the particle + // + int cx,cy,cz; + get_safe_cell_for_pos(cx,cy,cz,xpos[pidx],ypos[pidx],zpos[pidx]); + // + //cx = xcell[pidx]; + //cy = ycell[pidx]; + //cz = zcell[pidx]; + + // compute where the data should go + const int numpcls_now = (*numpcls_in_bucket_now)[cx][cy][cz]++; + const int outpidx = (*bucket_offset)[cx][cy][cz] + numpcls_now; + + // copy particle data to new location + // + xtmp[outpidx] = x[pidx]; + ytmp[outpidx] = y[pidx]; + ztmp[outpidx] = z[pidx]; + utmp[outpidx] = u[pidx]; + vtmp[outpidx] = v[pidx]; + wtmp[outpidx] = w[pidx]; + qtmp[outpidx] = q[pidx]; + if (TrackParticleID) + ParticleIDtmp[outpidx] = ParticleID[pidx]; + xavgtmp[outpidx] = xavg[pidx]; + yavgtmp[outpidx] = yavg[pidx]; + zavgtmp[outpidx] = zavg[pidx]; + } + // swap the tmp particle memory with the official particle memory + { + swap(xtmp,x); + swap(ytmp,y); + swap(ztmp,z); + swap(utmp,u); + swap(vtmp,v); + swap(wtmp,w); + swap(qtmp,q); + swap(ParticleIDtmp,ParticleID); + swap(xavgtmp,xavg); + swap(yavgtmp,yavg); + swap(zavgtmp,zavg); + } + + // check if the particles were sorted incorrectly + if(true) + { + for(int cx=0;cx= (nxc-1) ? nxc-1 : outcx+1; +// const int lowerindex = bucket_offset[cxlower][cylower][czlower]; +// const int upperoffset = bucket_offset[cxupper][cyupper][czupper]; +// const int upperindex = upperoffset + numpcls_in_bucket[outcx][outcy][outcz]; +// ... +// } +// } +// // (1) put fast particles that must be moved more than one +// // mesh cell at the end of the cell's list, and +// // (2) put slow particles in the correct location +// +// // count the number of particles that need to be moved +// // more than one mesh cell and allocate a special buffer for them. +// // (could change to count number of particles that need +// // to move more than N mesh cells). +// // +// int numpcls_long_move_thr = 0; +// #pragma omp for // nowait +// for (int i = 0; i < nop; i++) +// { +// const int cx = xidx[pidx]; +// const int cy = yidx[pidx]; +// const int cz = zidx[pidx]; +// +// const int cxlower = cx <= 0 ? 0 : cx-1; +// const int cxupper = cx >= (nxc-1) ? nxc-1 : cx+1; +// const int lowerindex = bucket_offset[cxlower][cylower][czlower]; +// const int upperoffset = bucket_offset[cxupper][cyupper][czupper]; +// const int upperindex = upperoffset + numpcls_in_bucket[cx][cy][cz]; +// if(i < lowerindex || i > upperindex) +// { +// numpcls_long_move_thr++; +// } +// } +// } +//} +//#endif + + diff --git a/utility/TimeTasks.cpp b/utility/TimeTasks.cpp index dd114ffc..13001f88 100644 --- a/utility/TimeTasks.cpp +++ b/utility/TimeTasks.cpp @@ -18,8 +18,11 @@ static const char *taskNames[] = // order must agree with Tasks in TimeTasks.h "particles", "last", "bfield", + "moment_pcl_sorting", "moment_accumulation", "moment_reduction", + "mover_pcl_sorting", + "mover_pcl_moving", "number_of_tasks" }; From a513bb1b47e7e35ddbf31c0555d7de1168b397d4 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 10 Jan 2014 22:14:21 +0100 Subject: [PATCH 072/118] committing new file forgotten in previous commit --- include/Parameters.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 include/Parameters.h diff --git a/include/Parameters.h b/include/Parameters.h new file mode 100644 index 00000000..ba4e980d --- /dev/null +++ b/include/Parameters.h @@ -0,0 +1,15 @@ +#ifndef _Parameters_h_ +#define _Parameters_h_ + +// namespace provides a more flexible, succinct singleton via "using Parameters" +// +namespace Parameters +{ + void init_parameters(); + + bool get_SORTING_PARTICLES(); + bool get_VECTORIZE_MOMENTS(); + bool get_VECTORIZE_MOVER(); + bool get_USING_XAVG(); +} +#endif From b3cc7254e71b7f9363af3b03a3604503a4eb0715 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 13 Jan 2014 10:36:37 +0100 Subject: [PATCH 073/118] corrected compile error on mic introduced two commits earlier --- fields/EMfields3D.cpp | 8 ++++---- particles/Particles3D.cpp | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 00240a67..a059fc77 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -569,7 +569,7 @@ void EMfields3D::sumMoments_vectorized( const int nop = pcls.getNOP(); #pragma omp master - timeTasks_begin_task(TimeTasks::MOMENT_ACCUMULATION); + { timeTasks_begin_task(TimeTasks::MOMENT_ACCUMULATION); } Moments10& speciesMoments10 = fetch_moments10Array(0); speciesMoments10.set_to_zero(); arr4_double moments = speciesMoments10.fetch_arr(); @@ -682,11 +682,11 @@ void EMfields3D::sumMoments_vectorized( } } #pragma omp master - timeTasks_end_task(TimeTasks::MOMENT_ACCUMULATION); + { timeTasks_end_task(TimeTasks::MOMENT_ACCUMULATION); } // reduction #pragma omp master - timeTasks_begin_task(TimeTasks::MOMENT_REDUCTION); + { timeTasks_begin_task(TimeTasks::MOMENT_REDUCTION); } { #pragma omp for collapse(2) for(int i=0;i Date: Mon, 13 Jan 2014 10:15:09 +0100 Subject: [PATCH 074/118] improved thread-awareness of debug and some asserts --- include/asserts.h | 29 +++--- include/debug.h | 12 ++- include/errors.h | 21 +++-- utility/asserts.cpp | 162 ++++++++++++++++++++++++++++--- utility/asserts.o | Bin 0 -> 7624 bytes utility/debug.cpp | 110 +++++++++++++++++++--- utility/errors.cpp | 98 +++++++++++++++---- utility/new/diagnostics.cpp | 183 ++++++++++++++++++++++++++++++++++++ 8 files changed, 546 insertions(+), 69 deletions(-) create mode 100644 utility/asserts.o create mode 100644 utility/new/diagnostics.cpp diff --git a/include/asserts.h b/include/asserts.h index e46b9c77..8a8a8718 100644 --- a/include/asserts.h +++ b/include/asserts.h @@ -38,16 +38,14 @@ #else // ifndef NDEBUG -// override system assert.h -// #define assert_fileLine(e, file, line) \ -// ((void)printf ("%s:%u: failed assertion `%s'\n", file, line, e), abort()) -// void eprintf_fileLine(const char *func, const char *file, int line_number, -// const char *format, ...); +void eprintf_fileLine(FILE * fptr, const char *type, const char *func, + const char *file, int line_number, const char *format, ...); #define dassert_fileLine(e, file, line, func) \ - (void)(printf("ERROR: file %s, line %d, function %s:\n\tfailed assertion: (%s)\n", file, line, func,e),abort()) -#define dassert_printf_fileLine(e, file, line, func, args...) \ - (void)(printf("ERROR: file %s, line %d, function %s:\n\tfailed assertion: (%s)\n\t", file, line, func,e), printf(args), printf("\n"), abort()) + (void)(eprintf_fileLine(stdout,"ERROR", func, file, line, \ + "\n\tfailed assertion: (%s)", e), abort()) +//#define dassert_printf_fileLine(e, file, line, func, args...) \ +// (void)(printf("ERROR: file %s, line %d, function %s:\n\tfailed assertion: (%s)\n\t", file, line, func,e), printf(args), printf("\n"), abort()) // comment out the next line if __builtin_expect causes problems #define USE_GCC_OPTIMIZATION @@ -59,16 +57,16 @@ #define dassert_(e) \ ((void) ((e) ? (void)0 : dassert_fileLine(#e, __FILE__, __LINE__, __func__))) -#define dassert_printf_(e, args...) \ - ((void) ((e) ? (void)0 : dassert_printf_fileLine(#e, __FILE__, __LINE__, __func__,##args))) +//#define dassert_printf_(e, args...) \ +// ((void) ((e) ? (void)0 : dassert_printf_fileLine(#e, __FILE__, __LINE__, __func__,##args))) #else // ifdef USE_GCC_OPTIMIZATION // optimized version of preceding // #define assert(e) \ // (__builtin_expect(!(e), 0) ? assert_fileLine (#e, __FILE__, __LINE__) : (void)0) #define dassert_(e) \ (__builtin_expect(!(e), 0) ? dassert_fileLine (#e, __FILE__, __LINE__, __func__) : (void)0) -#define dassert_printf(e, args...) \ - (__builtin_expect(!(e), 0) ? dassert_printf_fileLine (#e, __FILE__, __LINE__, __func__,##args) : (void)0) +//#define dassert_printf(e, args...) \ +// (__builtin_expect(!(e), 0) ? dassert_printf_fileLine (#e, __FILE__, __LINE__, __func__,##args) : (void)0) #endif // USE_GCC_OPTIMIZATION #if(MAX_ASSERT_LEVEL>=1) @@ -126,14 +124,19 @@ extern "C" { #else #define builtin_expect(a,b) __builtin_expect(a,b) #endif +// check whether two numbers are equal within machine precision #define assert_not_almost_eq(lhs,rhs) \ (fcmp(lhs, rhs, 1e-14) \ ? (void)0 \ : assert_error(__FILE__, __LINE__, __func__, " !=~= ", #lhs, #rhs, lhs, rhs)) #define assert_almost_eq(lhs,rhs) \ - (builtin_expect(fcmp(lhs, rhs, 1e-10),0) \ + (builtin_expect(fcmp(lhs, rhs, 1e-14),0) \ ? assert_error(__FILE__, __LINE__, __func__, " =~= ", #lhs, #rhs, lhs, rhs) \ : (void)0) +//#define assert_almost_eq(lhs,rhs) \ +// (builtin_expect(fcmp((lhs-rhs)/(fabs(lhs)+fabs(rhs)),1e-14),0) \ +// ? assert_error(__FILE__, __LINE__, __func__, " =~= ", #lhs, #rhs, lhs, rhs) \ +// : (void)0) #define assert_divides(lhs,rhs) \ (builtin_expect(rhs%lhs,0) \ ? assert_error(__FILE__, __LINE__, __func__, "(divides)", #lhs, #rhs, lhs, rhs) \ diff --git a/include/debug.h b/include/debug.h index 402d3819..fe1524c0 100644 --- a/include/debug.h +++ b/include/debug.h @@ -7,18 +7,20 @@ #include #include -#include "debug.h" +#include "errors.h" -void dfprintf_fileLine(FILE * fptr, const char *func, const char *file, int line_number, const char *format, ...); +void fprintf_fileLine(FILE * fptr, const char *type, const char *func, + const char *file, int line_number, const char *format, ...); -#define dprintf(args...) dfprintf_fileLine(stdout, __func__, __FILE__, __LINE__,## args) -#define dprint(var) dprintvar_fileLine(__func__,__FILE__,__LINE__,#var,var); +#define dprintf(args...) fprintf_fileLine(stdout, "DEBUG", __func__, __FILE__, __LINE__,## args) +#define dprint(var) printvar_fileLine(__func__, __FILE__,__LINE__,#var,var); #define dprint0(var) dprint(var) #define declare_dprintvar_fileLine(type) \ -void dprintvar_fileLine(const char*,const char*,int,const char*,type); +void printvar_fileLine(const char*,const char*,int,const char*,type); declare_dprintvar_fileLine(int); declare_dprintvar_fileLine(double); declare_dprintvar_fileLine(const char *); +declare_dprintvar_fileLine(const void *); #endif diff --git a/include/errors.h b/include/errors.h index ca80313f..ace6b5f2 100644 --- a/include/errors.h +++ b/include/errors.h @@ -1,16 +1,21 @@ #ifndef ipic_errors_H #define ipic_errors_H -void errmsg_printf_fileLine(const char *func, const char *file, int line_number, const char *format, ...); -void eprintf_fileLine(const char *func, const char *file, int line_number, const char *format, ...); -void Wprintf_fileLine(const char *func, const char *file, int line_number, const char *format, ...); +//void errmsg_printf_fileLine(const char *func, const char *file, int line_number, const char *format, ...); +//void eprintf_fileLine(const char *func, const char *file, int line_number, const char *format, ...); +//void Wprintf_fileLine(const char *func, const char *file, int line_number, const char *format, ...); +void eprintf_fileLine(FILE * fptr, const char *type, + const char *func, const char *file, int line_number, + const char *format, ...); -#define errmsg_printf(args...) \ - errmsg_printf_fileLine(__func__, __FILE__, __LINE__, ## args); #define eprintf(args...) \ - errmsg_printf_fileLine(__func__, __FILE__, __LINE__, ## args); -#define Wprintf(args...) \ - Wprintf_fileLine(__func__, __FILE__, __LINE__, ## args); + eprintf_fileLine(stdout,"ERROR",__func__, __FILE__, __LINE__, ## args); +#define error_printf(args...) \ + eprintf_fileLine(stdout,"ERROR",__func__, __FILE__, __LINE__, ## args); +//#define eprintf(args...) \ +// eprintf_fileLine("ERROR",__func__, __FILE__, __LINE__, ## args); +#define warning_printf(args...) \ + eprintf_fileLine("WARNING",__func__, __FILE__, __LINE__, ## args); #define declare_invalid_value_error(t1) \ void invalid_value_error_fileLine(const char* file, int line, const char* func, \ const char* type, const char* expr, t1 val); diff --git a/utility/asserts.cpp b/utility/asserts.cpp index 312fe79b..5b58a1ae 100644 --- a/utility/asserts.cpp +++ b/utility/asserts.cpp @@ -1,23 +1,47 @@ +#ifndef NO_MPI + #include "MPIdata.h" // for get_rank +#endif +#include "ompdefs.h" // for omp_get_thread_num #include #include "asserts.h" void assert_error(const char *file, int line, const char *func, const char *op, const char *lhs_str, const char *rhs_str, double lhs, double rhs) { - fprintf(stdout, "ERROR in file %s, line %d, function %s" "\n\tassertion failed: %s %s %s, i.e., %24.16e %s %24.16e\n", file, line, func, lhs_str, op, rhs_str, lhs, op, rhs); + + eprintf_fileLine(stdout, "ERROR", func,file,line, + "\n\tassertion failed: %s %s %s, i.e., %24.16e %s %24.16e\n", lhs_str, op, rhs_str, lhs, op, rhs); abort(); } -#define implement_assert_errmsg(t1,t2) \ - void assert_error(const char* file, int line, const char* func, \ - const char* op, const char* lhs_str, const char* rhs_str, \ - t1 lhs, t2 rhs) \ - { \ - std::cerr<< "ERROR in file " << file << ", line " << line \ - << ", function " << func \ - <<"\n\tassertion failed: " << lhs_str << op << rhs_str \ - << ", i.e., " << lhs << op << rhs << std::endl; \ - abort(); \ - } +#ifndef NO_MPI + #ifdef _OPENMP + #define process_string \ + std::cerr << "(" << MPIdata::get_rank() << "." << omp_get_thread_num() << ")"; + #else + #define process_string \ + std::cerr << "(" << MPIdata::get_rank() << ")"; + #endif +#else + #ifdef _OPENMP + #define process_string \ + std::cerr << "(." << omp_get_thread_num() << ")"; + #else + #define process_string + #endif +#endif + + #define implement_assert_errmsg(t1,t2) \ + void assert_error(const char* file, int line, const char* func, \ + const char* op, const char* lhs_str, const char* rhs_str, \ + t1 lhs, t2 rhs) \ + { \ + process_string \ + std::cerr << " ERROR in file " << file << ", line " << line \ + << ", function " << func \ + <<"\n\tassertion failed: " << lhs_str << op << rhs_str \ + << ", i.e., " << lhs << op << rhs << std::endl; \ + abort(); \ + } implement_assert_errmsg(size_t, size_t); implement_assert_errmsg(int, size_t); @@ -25,3 +49,117 @@ implement_assert_errmsg(size_t, int); implement_assert_errmsg(int, int); implement_assert_errmsg(long long, long long); implement_assert_errmsg(const char *, const char *); + +/* + fcmp + Copyright (c) 1998-2000 Theodore C. Belding + University of Michigan Center for the Study of Complex Systems + + + + This file is part of the fcmp distribution. fcmp is free software; + you can redistribute and modify it under the terms of the GNU Library + General Public License (LGPL), version 2 or later. This software + comes with absolutely no warranty. See the file COPYING for details + and terms of copying. + + File: fcmp.h + + Description: + + Knuth's floating point comparison operators, from: + Knuth, D. E. (1998). The Art of Computer Programming. + Volume 2: Seminumerical Algorithms. 3rd ed. Addison-Wesley. + Section 4.2.2, p. 233. ISBN 0-201-89684-2. + + Input parameters: + x1, x2: numbers to be compared + epsilon: determines tolerance + + epsilon should be carefully chosen based on the machine's precision, + the observed magnitude of error, the desired precision, and the + magnitude of the numbers to be compared. See the fcmp README file for + more information. + + This routine may be used for both single-precision (float) and + double-precision (double) floating-point numbers. + + Returns: + -1 if x1 < x2 + 0 if x1 == x2 + 1 if x1 > x2 +*/ + +/* + fcmp + Copyright (c) 1998-2000 Theodore C. Belding + University of Michigan Center for the Study of Complex Systems + + + + This file is part of the fcmp distribution. fcmp is free software; + you can redistribute and modify it under the terms of the GNU Library + General Public License (LGPL), version 2 or later. This software + comes with absolutely no warranty. See the file COPYING for details + and terms of copying. + + File: fcmp.c + + Description: see fcmp.h and README files. +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +int fcmp(double x1, double x2, double epsilon) +{ + double diff = x1-x2; + if(diff>epsilon) return 1; + if(diff<-epsilon) return -1; + return 0; + + // the code below was failing for some reason. -eaj + + int exponent; + double delta; + double difference; + + /* Get exponent(max(fabs(x1), fabs(x2))) and store it in exponent. */ + + /* If neither x1 nor x2 is 0, */ + /* this is equivalent to max(exponent(x1), exponent(x2)). */ + + /* If either x1 or x2 is 0, its exponent returned by frexp would be 0, */ + /* which is much larger than the exponents of numbers close to 0 in */ + /* magnitude. But the exponent of 0 should be less than any number */ + /* whose magnitude is greater than 0. */ + + /* So we only want to set exponent to 0 if both x1 and */ + /* x2 are 0. Hence, the following works for all x1 and x2. */ + + frexp(fabs(x1) > fabs(x2) ? x1 : x2, &exponent); + + /* Do the comparison. */ + + /* delta = epsilon * pow(2, exponent) */ + + /* Form a neighborhood around x2 of size delta in either direction. */ + /* If x1 is within this delta neighborhood of x2, x1 == x2. */ + /* Otherwise x1 > x2 or x1 < x2, depending on which side of */ + /* the neighborhood x1 is on. */ + + delta = ldexp(epsilon, exponent); + + difference = x1 - x2; + + if (difference > delta) + return 1; /* x1 > x2 */ + else if (difference < -delta) + return -1; /* x1 < x2 */ + else /* -delta <= difference <= delta */ + return 0; /* x1 == x2 */ +} + diff --git a/utility/asserts.o b/utility/asserts.o new file mode 100644 index 0000000000000000000000000000000000000000..27b794781d3db901fba768c5bda8c7f223843b9b GIT binary patch literal 7624 zcmdU!Z)_aZ5y0QsAvS4xb|?jcC_a!K1(a;ge@H_V=$*+WcVH*M=OhXeY}a>t&6fKU zZa2l2Qj*9hXa%J~0*QPGpa}6#sNzFEMEq+5DER&b~wf)dGNKmDvz|7vv zdG6uo5c(-kzI*e2J2P+I?Ce|b-rmf}hPGI&T~UjvTUCq)qVK$Vc94q1lXMUo!C54Ez{yjHerVY%|ffQlAq4m&Q9FXLH4~ z1UULRMLA_ZL&RU9Q_p<6!OuOUe~?OU3&rz{L7xt*ZGS-NulN;7Z=&=ner2cM>HHfC zT|MjZ>p_%NdOW=~y)C^xzja&qc{cp-tkGzAax^@i3;&c0KaBik!xP!?FDyomc@X}C zr4~FAo-yz*n2*ewj)Y$^ZSj(QIOt9B5uuMf%Y4=%BBhIHeTL=Dv%0Px*z|h#UF7F) z-UVJn>oc6c`O34a=OS9)|8@Os8x7xvoo6N;ewGVQWyAkO7b@PRRQBOpu!6JU;@**P zq_=Z4{3r{3_xkYt##KH2P9hR$!`g@1nX$3WV+pUCD0yW!q11{**{g!EB2n61Ed*Yz znox@_b$s8gM}v|Bdd0P%iMpQUS}P*C)lZ*Ke)@{SQJ+7Tr#?Rs{js;cfg!7ZwHv{P zh;c=(kB{XxZM6EM$jW{$AN2PZ zb~<%Cs5@TZ=L(t37_dPI&@0$bymi;9YEZTI_+ciN@8 zQ*ptQTXwiOBRQ(j25xH6br?N?%|6566HTMx4%fj-LdN59_*U|UkBqa8-Ui`e z$rlml{BWIg8T6M5F4udV=7;NKnb4Q>_8a)knjfx{TMha-10OeV+rWQ8ob$lPg~yog5bWvQCZ&F6-o&@FVX##|4*la$5M2aeiXpf7kqQo&3|F ze@<|jpV$H@Kxmw2u9Jnt_4-_5;7c_>y$!eOT7&)#20moqKQr)+#5sPhlM#dd4#DMl zFA6U6f1lt|e~P$XCl3gHStrwi%Q`tC{K$9rzZ3dDfn4&wcS3Od-+_ z$^Yk58c))ApT_yE_#ZXSZ^iki8urg`zf*LdWzKKE_i3EpawjJ= z*MCXMQb04>guG**FKi#c2j?HafoRKVqO4#jvO7AVYCj}@F8f_%|8vbgL1VVZ-?ikE zC99v(7pmE@9^T>E7c=bHex-yU>$=Ct9=ewHKbrp;&4Aa>%2-GGxyNINaf^U*(Ct4l*x$1hl%c)Q=%T#wZi|TL F{|1634 #include #include @@ -7,25 +11,32 @@ /** implementation of declarations in errors.h **/ -void errmsg_printf_fileLine(const char *func, const char *file, int line_number, - const char *format, ...) -{ - FILE* fptr = stdout; - fflush(fptr); - va_list args; - va_start(args, format); - fprintf(fptr, "ERROR in function %s, file %s, line %d: \n\t", - func, file, line_number); - /* print out remainder of message */ - vfprintf(fptr, format, args); - va_end(args); - // append terminating newline so user does not have to do it - fprintf(fptr, "\n"); - fflush(fptr); +void fprintf_fileLine(FILE * fptr, const char *type, const char *func, const char *file, int line_number, const char *format, ...); - abort(); -} +// This is not thread-safe. +//void errmsg_printf_fileLine(const char *func, const char *file, int line_number, +// const char *format, ...) +//{ +// FILE* fptr = stdout; +// fflush(fptr); +// va_list args; +// va_start(args, format); +// fprintf(fptr, "ERROR in function %s, file %s, line %d: \n\t", +// func, file, line_number); +// /* print out remainder of message */ +// vfprintf(fptr, format, args); +// va_end(args); +// // append terminating newline so user does not have to do it +// fprintf(fptr, "\n"); +// fflush(fptr); +// +// abort(); +//} +// This needs to be fixed to be thread-safe like +// eprintf_fileLine() below. Write the message to a string and +// then print it out as an atomic operation. +// #include using namespace std; #define implement_invalid_value_error(t1) \ @@ -42,3 +53,56 @@ implement_invalid_value_error(double); implement_invalid_value_error(int); implement_invalid_value_error(const char*); +/*! a more verbose version of fprintf_fileLine for use in + * warnings and error messages */ +void eprintf_fileLine(FILE * fptr, const char *type, + const char *func, const char *file, int line_number, + const char *format, ...) +{ + // writing directly to fptr would avoid limiting the length + // of the output string, but by first writing to a string + // we achieve thread safety. + // + // write the message to a string. + // + const int maxchars = 1024; + char error_msg[maxchars+2]; + // identify the process and thread + char process_thread_str[50]; + #ifndef NO_MPI + #ifdef _OPENMP + snprintf(process_thread_str, 50, ", process %d, thread %d", + MPIdata::get_rank(), omp_get_thread_num()); + #else + snprintf(process_thread_str, 50, ", process %d", + MPIdata::get_rank()); + #endif + #else + #ifdef _OPENMP + snprintf(process_thread_str, 50, ", thread %d", + omp_get_thread_num()); + #else + sprintf(process_thread_str, ""); + #endif + #endif + char *sptr = error_msg; + int chars_so_far=0; + va_list args; + va_start(args, format); + chars_so_far = snprintf(sptr, maxchars, + "%s in method %s(), file %s, line %d%s:\n\t", + type, + func, file, // my_basename(file), + line_number, process_thread_str); + /* print out remainder of message */ + chars_so_far += vsnprintf(sptr+chars_so_far, maxchars-chars_so_far, format, args); + va_end(args); + sprintf(sptr+chars_so_far, "\n"); + + // print the message + fflush(fptr); + // #pragma omp critical // need this? + { fprintf(fptr,error_msg); } + fflush(fptr); + abort(); +} diff --git a/utility/new/diagnostics.cpp b/utility/new/diagnostics.cpp new file mode 100644 index 00000000..3b8d7372 --- /dev/null +++ b/utility/new/diagnostics.cpp @@ -0,0 +1,183 @@ + +/** implementation of declarations in utility/debug.h **/ + +#include +#include "TimeTasks.h" +#include "debug.h" +#include "asserts.h" +#include "../mpidata/MPIdata.h" // for rank + +#define implement_dprintvar_fileLine(code,type) \ + void dprintvar_fileLine(const char* func, const char* file, int line, \ + const char* name, type val) \ + { \ + dfprintf_fileLine(stderr,func,file,line, \ + code " == %s",val,name); \ + } +implement_dprintvar_fileLine("%s",const char*); +implement_dprintvar_fileLine("%d",int); +//implement_dprintvar_fileLine("%24.16e",double); +implement_dprintvar_fileLine("%f",double); + +void dfprintf_fileLine(FILE* fptr, const char *func, const char *file, int line_number, + const char *format, ...) +{ + fflush(fptr); + va_list args; + va_start(args, format); + fprintf(fptr, "(%d) DEBUG %s(), %s:%d: ", + get_rank(), func, + file, // my_basename(file), + line_number); + /* print out remainder of message */ + vfprintf(fptr, format, args); + va_end(args); + fprintf(fptr,"\n"); + fflush(fptr); +} + +int get_rank() { return mpi->rank; } + +/** implementation of declarations in utility/assert.h **/ + +// so that we can print doubles to desired precision +// +void assert_error(const char* file, int line, const char* func, + const char* op, const char* lhs_str, const char* rhs_str, + double lhs, double rhs) +{ + fprintf(stderr,"ERROR in file %s, line %d, function %s" + "\n\tassertion failed: %s %s %s, i.e., %24.16e %s %24.16e\n", + file, line, func, lhs_str, op, rhs_str, lhs, op, rhs); + abort(); +} + +#define implement_assert_errmsg(t1,t2) \ + void assert_error(const char* file, int line, const char* func, \ + const char* op, const char* lhs_str, const char* rhs_str, \ + t1 lhs, t2 rhs) \ + { \ + std::cerr<< "ERROR in file " << file << ", line " << line \ + << ", function " << func \ + <<"\n\tassertion failed: " << lhs_str << op << rhs_str \ + << ", i.e., " << lhs << op << rhs << endl; \ + abort(); \ + } + +implement_assert_errmsg(int,int); +implement_assert_errmsg(const char*,const char*); +implement_assert_errmsg(const string&,const string&); + +/** implementation of declarations in utility/TimeTasks.h **/ + +void TimeTasks::resetCycle() +{ + for(int e=0;e Date: Mon, 13 Jan 2014 14:29:43 +0100 Subject: [PATCH 075/118] fixed bug in sumMoments_vectorized() --- fields/EMfields3D.cpp | 174 ++++++++++++++++++++++++++-------- fields/Moments.cpp | 17 ++-- include/Alloc.h | 11 ++- include/EMfields3D.h | 4 +- main/Parameters.cpp | 10 +- main/iPic3Dlib.cpp | 27 ++++-- particles/Particles3D.cpp | 30 ++++-- particles/Particles3Dcomm.cpp | 6 +- 8 files changed, 208 insertions(+), 71 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index a059fc77..b7bd9b32 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -192,7 +192,8 @@ EMfields3D::EMfields3D(Collective * col, Grid * grid) : injFieldsFront = new injInfoFields(nxn, nyn, nzn); injFieldsRear = new injInfoFields(nxn, nyn, nzn); - sizeMomentsArray = omp_get_max_threads(); + // EDIT: delete "+ns" before checking in + sizeMomentsArray = omp_get_max_threads()+ns; moments10Array = new Moments10*[sizeMomentsArray]; for(int i=0;igetYstart(); const double zstart = grid->getZstart(); #pragma omp parallel + { for (int species_idx = 0; species_idx < ns; species_idx++) { const Particles3Dcomm& pcls = part[species_idx]; @@ -570,14 +582,61 @@ void EMfields3D::sumMoments_vectorized( const int nop = pcls.getNOP(); #pragma omp master { timeTasks_begin_task(TimeTasks::MOMENT_ACCUMULATION); } - Moments10& speciesMoments10 = fetch_moments10Array(0); - speciesMoments10.set_to_zero(); + // EDIT change arr_idx to 0 before checking this in!! + const int arr_idx = omp_get_max_threads()+is; + Moments10& speciesMoments10 = fetch_moments10Array(arr_idx); arr4_double moments = speciesMoments10.fetch_arr(); - #pragma omp for collapse(2) // schedule(static) - for(int cx=0;cxgetCartesian_rank() == 0) diff --git a/fields/Moments.cpp b/fields/Moments.cpp index c1518b4d..b7b09b85 100644 --- a/fields/Moments.cpp +++ b/fields/Moments.cpp @@ -3,13 +3,14 @@ void Moments10::set_to_zero() { - #pragma omp parallel for collapse(4) - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) - for (register int m = 0; m < 10; m++) - { - arr[i][j][k][m] = 0.0; - } + arr.setall(0); + //#pragma omp parallel for collapse(4) + //for (register int i = 0; i < nx; i++) + //for (register int j = 0; j < ny; j++) + //for (register int k = 0; k < nz; k++) + //for (register int m = 0; m < 10; m++) + //{ + // arr[i][j][k][m] = 0.0; + //} } diff --git a/include/Alloc.h b/include/Alloc.h index e8456785..63100dc3 100644 --- a/include/Alloc.h +++ b/include/Alloc.h @@ -212,8 +212,10 @@ namespace iPic3D base_arr(size_t s) : size(s), arr(AlignedAlloc(type, s)) {} base_arr(type* in, size_t s) : size(s), arr(in) {} ~base_arr(){} + int get_size() { return size; } void free() { AlignedFree(arr); } void setall(type val){ + // #pragma omp for for(size_t i=0;i::set(n3,n2,n1, value); } void setall(type val){ + // #pragma omp for for(size_t i=0;i operator[](size_t n4)const{ check_bounds(n4, S4); @@ -615,9 +619,8 @@ namespace iPic3D protected: void setall(type val) { - #pragma omp for - for(int i=0;i::S1; using const_array_ref4::arr4; using const_array_ref4::getidx; + public: // this did not work unless I made the using statment public. + using const_array_ref4::get_size; public: ~array_ref4(){} array_ref4(size_t s4, size_t s3, size_t s2, size_t s1) : diff --git a/include/EMfields3D.h b/include/EMfields3D.h index ab0cc1df..c55d68ce 100644 --- a/include/EMfields3D.h +++ b/include/EMfields3D.h @@ -122,6 +122,8 @@ class EMfields3D // :public Field /*! sum moments (interp_P2G) versions */ void sumMoments(const Particles3Dcomm* part, Grid * grid, VirtualTopology3D * vct); void sumMoments_vectorized(const Particles3Dcomm* part, Grid * grid, VirtualTopology3D * vct); + void checkMoments(const Particles3Dcomm* part); + void checkMoment(const Particles3Dcomm* part); void sumMomentsOld(const Particles3Dcomm& pcls, Grid * grid, VirtualTopology3D * vct); /*! add accumulated moments to the moments for a given species */ //void addToSpeciesMoments(const TenMoments & in, int is); @@ -264,7 +266,7 @@ class EMfields3D // :public Field /*! fetch array for summing moments of thread i */ Moments10& fetch_moments10Array(int i){ assert_le(0,i); - assert_le(i,sizeMomentsArray); + assert_lt(i,sizeMomentsArray); return *(moments10Array[i]); } diff --git a/main/Parameters.cpp b/main/Parameters.cpp index 740586de..b94b239d 100644 --- a/main/Parameters.cpp +++ b/main/Parameters.cpp @@ -9,8 +9,10 @@ void Parameters::init_parameters() SORTING_PARTICLES = get_VECTORIZE_MOMENTS() || get_VECTORIZE_MOVER(); } -bool Parameters::get_SORTING_PARTICLES() { return SORTING_PARTICLES; } +//bool Parameters::get_SORTING_PARTICLES() { return SORTING_PARTICLES; } +bool Parameters::get_SORTING_PARTICLES() { return true; } bool Parameters::get_VECTORIZE_MOMENTS() { return false; } -bool Parameters::get_VECTORIZE_MOVER() { return false; } -// this will also return true if we communicate particles per iteration -bool Parameters::get_USING_XAVG() { return get_VECTORIZE_MOVER(); } +bool Parameters::get_VECTORIZE_MOVER() { return true; } +// this must also return true if we communicate particles per iteration +//bool Parameters::get_USING_XAVG() { return get_VECTORIZE_MOVER(); } +bool Parameters::get_USING_XAVG() { return get_SORTING_PARTICLES(); } diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 891a16af..3462c132 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -4,6 +4,9 @@ #include "ipicdefs.h" #include "debug.h" #include "Parameters.h" +#include "ompdefs.h" + +#include "Moments.h" // for debugging using namespace iPic3D; MPIdata* iPic3D::c_Solver::mpi=0; @@ -178,17 +181,19 @@ void c_Solver::CalculateMoments() { timeTasks_set_main_task(TimeTasks::MOMENTS); EMf->updateInfoFields(grid,vct,col); - EMf->setZeroDensities(); // set to zero the densities + EMf->setZeroDensities(); if(Parameters::get_SORTING_PARTICLES()) { // sort particles - #pragma omp master - timeTasks_begin_task(TimeTasks::MOMENT_PCL_SORTING); - for(int species_idx=0; species_idxsumMoments(part, grid, vct); } + // do the moments calculated by the old and new code agree? + //EMf->setZeroDensities(); + //EMf->sumMoments_vectorized(part, grid, vct); + //EMf->setZeroDensities(); + //EMf->sumMoments(part, grid, vct); + //EMf->checkMoments(part); //for (int i = 0; i < ns; i++) //{ // EMf->sumMomentsOld(part[i], grid, vct); @@ -245,6 +256,7 @@ bool c_Solver::ParticlesMover() { // Should change this to add background field EMf->set_fieldForPcls(); #pragma omp parallel + { for (int i = 0; i < ns; i++) // move each species { // #pragma omp task inout(part[i]) in(grid) target_device(booster) @@ -256,6 +268,7 @@ bool c_Solver::ParticlesMover() { else part[i].mover_PC(grid, vct, EMf); } + } for (int i = 0; i < ns; i++) // move each species { mem_avail = part[i].communicate_particles(vct); diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 9a88d51c..fde7536f 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -319,7 +319,7 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { const_arr4_pfloat fieldForPcls = EMf->get_fieldForPcls(); #pragma omp master - timeTasks_begin_task(TimeTasks::MOVER_PCL_MOVING); + { timeTasks_begin_task(TimeTasks::MOVER_PCL_MOVING); } const pfloat dto2 = .5 * dt, qdto2mc = qom * dto2 / c; #pragma omp for schedule(static) // why does single precision make no difference in execution speed? @@ -453,7 +453,7 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { w[pidx] = 2.0 * wavg - worig; } // END OF ALL THE PARTICLES #pragma omp master - timeTasks_end_task(TimeTasks::MOVER_PCL_MOVING); + { timeTasks_end_task(TimeTasks::MOVER_PCL_MOVING); } } /** mover with a Predictor-Corrector scheme */ @@ -485,10 +485,12 @@ void Particles3D::mover_PC_vectorized( if(niter>1) // on first iteration already was sorted to sum moments { #pragma omp master - timeTasks_begin_task(TimeTasks::MOVER_PCL_SORTING); - sort_particles_serial(xavg, yavg, zavg, grid,vct); - #pragma omp master - timeTasks_end_task(TimeTasks::MOVER_PCL_SORTING); + { + timeTasks_begin_task(TimeTasks::MOVER_PCL_SORTING); + sort_particles_serial(xavg, yavg, zavg, grid,vct); + timeTasks_end_task(TimeTasks::MOVER_PCL_SORTING); + } + #pragma omp barrier } #pragma omp master @@ -534,15 +536,23 @@ void Particles3D::mover_PC_vectorized( for(int pidx=bucket_offset; pidxsetall(0); // iterate through particles and count where they will go @@ -1018,6 +1017,7 @@ void Particles3Dcomm::sort_particles_serial( (*bucket_offset)[cx][cy][cz] = accpcls; accpcls += (*numpcls_in_bucket)[cx][cy][cz]; } + assert_eq(accpcls,nop); numpcls_in_bucket_now->setall(0); // put the particles where they are supposed to go @@ -1035,6 +1035,10 @@ void Particles3Dcomm::sort_particles_serial( // compute where the data should go const int numpcls_now = (*numpcls_in_bucket_now)[cx][cy][cz]++; const int outpidx = (*bucket_offset)[cx][cy][cz] + numpcls_now; + assert_lt(outpidx, nop); + assert_ge(outpidx, 0); + assert_lt(pidx, nop); + assert_ge(pidx, 0); // copy particle data to new location // From 4c36c2dcd0ba16d24ee02815f4dc58c9992cde9d Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 14 Jan 2014 23:34:26 +0100 Subject: [PATCH 076/118] cleanup after previous commit --- fields/EMfields3D.cpp | 142 +++++++++++------------------------------- include/EMfields3D.h | 6 +- main/iPic3Dlib.cpp | 9 +-- 3 files changed, 44 insertions(+), 113 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index b7bd9b32..534743f4 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -4,6 +4,7 @@ #include "Particles3Dcomm.h" #include "TimeTasks.h" #include "Moments.h" +#include "Parameters.h" #include "ompdefs.h" #include "debug.h" @@ -192,8 +193,17 @@ EMfields3D::EMfields3D(Collective * col, Grid * grid) : injFieldsFront = new injInfoFields(nxn, nyn, nzn); injFieldsRear = new injInfoFields(nxn, nyn, nzn); - // EDIT: delete "+ns" before checking in - sizeMomentsArray = omp_get_max_threads()+ns; + if(Parameters::get_VECTORIZE_MOMENTS()) + { + // In this case particles are sorted + // and there is no need for each thread + // to sum moments in a separate array. + sizeMomentsArray = 1; + } + else + { + sizeMomentsArray = omp_get_max_threads(); + } moments10Array = new Moments10*[sizeMomentsArray]; for(int i=0;igetCartesian_rank() == 0) @@ -1765,30 +1709,8 @@ void EMfields3D::communicateGhostP2G(int ns, int bcFaceXright, int bcFaceXleft, communicateNode_P(nxn, nyn, nzn, pZZsn, ns, vct); } -/* add moments (e.g. from an OpenMP thread) to the accumulated moments */ -//void EMfields3D::addToSpeciesMoments(const TenMoments & in, int is) { -// assert_eq(in.get_nx(), nxn); -// assert_eq(in.get_ny(), nyn); -// assert_eq(in.get_nz(), nzn); -// for (register int i = 0; i < nxn; i++) { -// for (register int j = 0; j < nyn; j++) -// for (register int k = 0; k < nzn; k++) { -// rhons[is][i][j][k] += invVOL*in.get_rho(i, j, k); -// Jxs [is][i][j][k] += invVOL*in.get_Jx(i, j, k); -// Jys [is][i][j][k] += invVOL*in.get_Jy(i, j, k); -// Jzs [is][i][j][k] += invVOL*in.get_Jz(i, j, k); -// pXXsn[is][i][j][k] += invVOL*in.get_pXX(i, j, k); -// pXYsn[is][i][j][k] += invVOL*in.get_pXY(i, j, k); -// pXZsn[is][i][j][k] += invVOL*in.get_pXZ(i, j, k); -// pYYsn[is][i][j][k] += invVOL*in.get_pYY(i, j, k); -// pYZsn[is][i][j][k] += invVOL*in.get_pYZ(i, j, k); -// pZZsn[is][i][j][k] += invVOL*in.get_pZZ(i, j, k); -// } -// } -//} - -/*! set to 0 all the densities fields */ -void EMfields3D::setZeroDensities() { +void EMfields3D::setZeroDerivedMoments() +{ for (register int i = 0; i < nxn; i++) for (register int j = 0; j < nyn; j++) for (register int k = 0; k < nzn; k++) { @@ -1806,6 +1728,12 @@ void EMfields3D::setZeroDensities() { rhoc[i][j][k] = 0.0; rhoh[i][j][k] = 0.0; } +} + +void EMfields3D::setZeroPrimaryMoments() { + + // set primary moments to zero + // for (register int kk = 0; kk < ns; kk++) for (register int i = 0; i < nxn; i++) for (register int j = 0; j < nyn; j++) @@ -1823,6 +1751,12 @@ void EMfields3D::setZeroDensities() { } } +/*! set to 0 all the densities fields */ +void EMfields3D::setZeroDensities() { + setZeroDerivedMoments(); + setZeroPrimaryMoments(); +} + /*!SPECIES: Sum the charge density of different species on NODES */ void EMfields3D::sumOverSpecies(VirtualTopology3D * vct) { for (int is = 0; is < ns; is++) diff --git a/include/EMfields3D.h b/include/EMfields3D.h index c55d68ce..5d2f4780 100644 --- a/include/EMfields3D.h +++ b/include/EMfields3D.h @@ -104,6 +104,10 @@ class EMfields3D // :public Field void interpDensitiesN2C(VirtualTopology3D * vct, Grid * grid); /*! set to 0 all the densities fields */ void setZeroDensities(); + /*! set to 0 primary moments */ + void setZeroPrimaryMoments(); + /*! set to 0 all densities derived from primary moments */ + void setZeroDerivedMoments(); /*! Sum rhon over species */ void sumOverSpecies(VirtualTopology3D * vct); /*! Sum current over different species */ @@ -122,8 +126,6 @@ class EMfields3D // :public Field /*! sum moments (interp_P2G) versions */ void sumMoments(const Particles3Dcomm* part, Grid * grid, VirtualTopology3D * vct); void sumMoments_vectorized(const Particles3Dcomm* part, Grid * grid, VirtualTopology3D * vct); - void checkMoments(const Particles3Dcomm* part); - void checkMoment(const Particles3Dcomm* part); void sumMomentsOld(const Particles3Dcomm& pcls, Grid * grid, VirtualTopology3D * vct); /*! add accumulated moments to the moments for a given species */ //void addToSpeciesMoments(const TenMoments & in, int is); diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 3462c132..4d8c532d 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -181,7 +181,6 @@ void c_Solver::CalculateMoments() { timeTasks_set_main_task(TimeTasks::MOMENTS); EMf->updateInfoFields(grid,vct,col); - EMf->setZeroDensities(); if(Parameters::get_SORTING_PARTICLES()) { @@ -204,18 +203,14 @@ void c_Solver::CalculateMoments() { } else { + EMf->setZeroPrimaryMoments(); EMf->sumMoments(part, grid, vct); } - // do the moments calculated by the old and new code agree? - //EMf->setZeroDensities(); - //EMf->sumMoments_vectorized(part, grid, vct); - //EMf->setZeroDensities(); - //EMf->sumMoments(part, grid, vct); - //EMf->checkMoments(part); //for (int i = 0; i < ns; i++) //{ // EMf->sumMomentsOld(part[i], grid, vct); //} + EMf->setZeroDerivedMoments(); EMf->sumOverSpecies(vct); // sum all over the species // Fill with constant charge the planet From bef8049c5bbd52c6917a8f2e3c50b4939bb29b12 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 14 Jan 2014 23:36:35 +0100 Subject: [PATCH 077/118] created mechanism to restrict debug to master thread of main process --- include/parallel.h | 18 ++++++++++++++++++ utility/debug.cpp | 5 ++++- 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 include/parallel.h diff --git a/include/parallel.h b/include/parallel.h new file mode 100644 index 00000000..4c6f9ece --- /dev/null +++ b/include/parallel.h @@ -0,0 +1,18 @@ +#ifndef _parallel_h_ +#define _parallel_h_ +/********************************* + * General header for parallelism + * (MPI, OpenMP, and SIMD) + *********************************/ + +#include "MPIdata.h" +#include "ompdefs.h" + +/*! used to restrict output to a single thread of a single process */ +//inline bool is_main_master_thread() +inline bool is_output_thread() +{ + return !(MPIdata::get_rank() || omp_get_thread_num()); +} + +#endif diff --git a/utility/debug.cpp b/utility/debug.cpp index bd4daa9f..44d79626 100644 --- a/utility/debug.cpp +++ b/utility/debug.cpp @@ -4,6 +4,7 @@ #endif #include "ompdefs.h" // for omp_get_thread_num #include "debug.h" +#include "parallel.h" // temporary #define implement_dprintvar_fileLine(code,type) \ void printvar_fileLine(const char* func, const char* file, int line, \ @@ -14,7 +15,7 @@ implement_dprintvar_fileLine("%s", const char *); implement_dprintvar_fileLine("%d", int); -implement_dprintvar_fileLine("%g", double); +implement_dprintvar_fileLine("%e", double); implement_dprintvar_fileLine("%p", const void *); // void dfprintf_fileLine(FILE * fptr, const char *func, const char *file, int line_number, const char *format, ...) @@ -69,6 +70,8 @@ void fprintf_fileLine(FILE * fptr, const char *type, const char *func, const char *file, int line_number, const char *format, ...) { + //if(!is_output_thread()) return; // temporary + // writing directly to fptr would avoid limiting the length // of the output string, but by first writing to a string // we achieve thread safety. From a351d13e2b856ba86405c02731f0613a4436285c Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 14 Jan 2014 23:38:33 +0100 Subject: [PATCH 078/118] implemented assert_almost_eq() with tolerance parameter like with fcmp --- include/asserts.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/asserts.h b/include/asserts.h index 8a8a8718..7bac3452 100644 --- a/include/asserts.h +++ b/include/asserts.h @@ -125,12 +125,12 @@ extern "C" { #define builtin_expect(a,b) __builtin_expect(a,b) #endif // check whether two numbers are equal within machine precision -#define assert_not_almost_eq(lhs,rhs) \ - (fcmp(lhs, rhs, 1e-14) \ +#define assert_not_almost_eq(lhs,rhs,tol) \ + (fcmp(lhs, rhs, tol) \ ? (void)0 \ : assert_error(__FILE__, __LINE__, __func__, " !=~= ", #lhs, #rhs, lhs, rhs)) -#define assert_almost_eq(lhs,rhs) \ - (builtin_expect(fcmp(lhs, rhs, 1e-14),0) \ +#define assert_almost_eq(lhs,rhs,tol) \ + (builtin_expect(fcmp(lhs, rhs, tol),0) \ ? assert_error(__FILE__, __LINE__, __func__, " =~= ", #lhs, #rhs, lhs, rhs) \ : (void)0) //#define assert_almost_eq(lhs,rhs) \ From eda80987da50efc079a8bbd2033084f36f974289 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Tue, 14 Jan 2014 23:44:45 +0100 Subject: [PATCH 079/118] setting get_VECTORIZE_MOMENTS to true --- main/Parameters.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/Parameters.cpp b/main/Parameters.cpp index b94b239d..77abb87e 100644 --- a/main/Parameters.cpp +++ b/main/Parameters.cpp @@ -11,7 +11,7 @@ void Parameters::init_parameters() //bool Parameters::get_SORTING_PARTICLES() { return SORTING_PARTICLES; } bool Parameters::get_SORTING_PARTICLES() { return true; } -bool Parameters::get_VECTORIZE_MOMENTS() { return false; } +bool Parameters::get_VECTORIZE_MOMENTS() { return true; } bool Parameters::get_VECTORIZE_MOVER() { return true; } // this must also return true if we communicate particles per iteration //bool Parameters::get_USING_XAVG() { return get_VECTORIZE_MOVER(); } From 7b35413de496f5f6f5d038009e3b3e25dac8e4c3 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Wed, 15 Jan 2014 11:21:15 +0100 Subject: [PATCH 080/118] Restored iteration order over momentsArray[c][m] so m is inner. --- fields/EMfields3D.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 534743f4..ed247f17 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -712,8 +712,8 @@ void EMfields3D::sumMoments_vectorized( // add particle to moments { // which is the superior order for the following loop? - for(int m=0; m<10; m++) for(int c=0; c<8; c++) + for(int m=0; m<10; m++) { momentsArray[c][m] += velmoments[m]*weights[c]; // When simd above is uncommented, From 1a4e10bbb7acb508d41a8afe5d0ee561d0a44ac5 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Wed, 15 Jan 2014 13:52:43 +0100 Subject: [PATCH 081/118] fixed compile errors from merge --- fields/EMfields3D.cpp | 23 +++++++++++------------ include/EMfields3D.h | 22 +++++++++++----------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index f07c4c74..d3d21653 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -120,7 +120,7 @@ EMfields3D::EMfields3D(Collective * col, Grid * grid) : vectY (nxn, nyn, nzn), vectZ (nxn, nyn, nzn), divC (nxc, nyc, nzc), - //arr (double,nxc-2,nyc-2,nzc-2); + arr (nxc-2,nyc-2,nzc-2), // B_ext and J_ext should not be allocated unless used. Bx_ext(nxn,nyn,nzn), By_ext(nxn,nyn,nzn), @@ -3517,8 +3517,7 @@ void EMfields3D::BoundaryConditionsE(arr3_double vectorX, arr3_double vectorY, a } /*! get Electric Field component X array cell without the ghost cells */ -void EMfields3D::getExc(Grid3DCU *grid) { - +arr3_double EMfields3D::getExc(Grid3DCU *grid) { array3_double tmp(nxc,nyc,nzc); grid->interpN2C(tmp, Ex); @@ -3529,7 +3528,7 @@ void EMfields3D::getExc(Grid3DCU *grid) { return arr; } /*! get Electric Field component Y array cell without the ghost cells */ -double ***EMfields3D::getEyc(Grid3DCU *grid) { +arr3_double EMfields3D::getEyc(Grid3DCU *grid) { array3_double tmp(nxc,nyc,nzc); grid->interpN2C(tmp, Ey); @@ -3540,7 +3539,7 @@ double ***EMfields3D::getEyc(Grid3DCU *grid) { return arr; } /*! get Electric Field component Z array cell without the ghost cells */ -double ***EMfields3D::getEzc(Grid3DCU *grid) { +arr3_double EMfields3D::getEzc(Grid3DCU *grid) { array3_double tmp(nxc,nyc,nzc); grid->interpN2C(tmp, Ez); @@ -3551,7 +3550,7 @@ double ***EMfields3D::getEzc(Grid3DCU *grid) { return arr; } /*! get Magnetic Field component X array cell without the ghost cells */ -double ***EMfields3D::getBxc() { +arr3_double EMfields3D::getBxc() { for (int i = 1; i < nxc-1; i++) for (int j = 1; j < nyc-1; j++) for (int k = 1; k < nzc-1; k++) @@ -3559,7 +3558,7 @@ double ***EMfields3D::getBxc() { return arr; } /*! get Magnetic Field component Y array cell without the ghost cells */ -double ***EMfields3D::getByc() { +arr3_double EMfields3D::getByc() { for (int i = 1; i < nxc-1; i++) for (int j = 1; j < nyc-1; j++) for (int k = 1; k < nzc-1; k++) @@ -3567,7 +3566,7 @@ double ***EMfields3D::getByc() { return arr; } /*! get Magnetic Field component Z array cell without the ghost cells */ -double ***EMfields3D::getBzc() { +arr3_double EMfields3D::getBzc() { for (int i = 1; i < nxc-1; i++) for (int j = 1; j < nyc-1; j++) for (int k = 1; k < nzc-1; k++) @@ -3575,7 +3574,7 @@ double ***EMfields3D::getBzc() { return arr; } /*! get species density component X array cell without the ghost cells */ -double ***EMfields3D::getRHOcs(Grid3DCU *grid, int is) { +arr3_double EMfields3D::getRHOcs(Grid3DCU *grid, int is) { array4_double tmp(ns,nxc,nyc,nzc); grid->interpN2C(tmp, is, rhons); @@ -3587,7 +3586,7 @@ double ***EMfields3D::getRHOcs(Grid3DCU *grid, int is) { } /*! get Magnetic Field component X array species is cell without the ghost cells */ -double ***EMfields3D::getJxsc(Grid3DCU *grid, int is) { +arr3_double EMfields3D::getJxsc(Grid3DCU *grid, int is) { array4_double tmp(ns,nxc,nyc,nzc); grid->interpN2C(tmp, is, Jxs); @@ -3599,7 +3598,7 @@ double ***EMfields3D::getJxsc(Grid3DCU *grid, int is) { } /*! get current component Y array species is cell without the ghost cells */ -double ***EMfields3D::getJysc(Grid3DCU *grid, int is) { +arr3_double EMfields3D::getJysc(Grid3DCU *grid, int is) { array4_double tmp(ns,nxc,nyc,nzc); grid->interpN2C(tmp, is, Jys); @@ -3610,7 +3609,7 @@ double ***EMfields3D::getJysc(Grid3DCU *grid, int is) { return arr; } /*! get current component Z array species is cell without the ghost cells */ -double ***EMfields3D::getJzsc(Grid3DCU *grid, int is) { +arr3_double EMfields3D::getJzsc(Grid3DCU *grid, int is) { array4_double tmp(ns,nxc,nyc,nzc); grid->interpN2C(tmp, is, Jzs); diff --git a/include/EMfields3D.h b/include/EMfields3D.h index 8bdd954e..e4e6f624 100644 --- a/include/EMfields3D.h +++ b/include/EMfields3D.h @@ -203,12 +203,12 @@ class EMfields3D // :public Field // field components without ghost cells // - void getExc(arr3_double arr, Grid3DCU *grid); - void getEyc(arr3_double arr, Grid3DCU *grid); - void getEzc(arr3_double arr, Grid3DCU *grid); - void getBxc(arr3_double arr); - void getByc(arr3_double arr); - void getBzc(arr3_double arr); + arr3_double getExc(Grid3DCU *grid); + arr3_double getEyc(Grid3DCU *grid); + arr3_double getEzc(Grid3DCU *grid); + arr3_double getBxc(); + arr3_double getByc(); + arr3_double getBzc(); arr3_double getRHOc() { return rhoc; } arr3_double getRHOn() { return rhon; } @@ -221,7 +221,7 @@ class EMfields3D // :public Field double getRHOns(int X,int Y,int Z,int is)const{return rhons.get(is,X,Y,Z);} arr4_double getRHOns(){return rhons;} /* density on cells without ghost cells */ - void getRHOcs(arr3_double arr, Grid3DCU *grid, int is); + arr3_double getRHOcs(Grid3DCU *grid, int is); double getBx_ext(int X, int Y, int Z) const{return Bx_ext.get(X,Y,Z);} double getBy_ext(int X, int Y, int Z) const{return By_ext.get(X,Y,Z);} @@ -256,9 +256,9 @@ class EMfields3D // :public Field // get current for species in all cells except ghost // - void getJxsc(arr3_double arr, Grid3DCU *grid, int is); - void getJysc(arr3_double arr, Grid3DCU *grid, int is); - void getJzsc(arr3_double arr, Grid3DCU *grid, int is); + arr3_double getJxsc(Grid3DCU *grid, int is); + arr3_double getJysc(Grid3DCU *grid, int is); + arr3_double getJzsc(Grid3DCU *grid, int is); /*! get the electric field energy */ double getEenergy(); @@ -397,7 +397,7 @@ class EMfields3D // :public Field array3_double vectY; array3_double vectZ; array3_double divC; - //array3_double arr; + array3_double arr; /* temporary arrays for summing moments */ int sizeMomentsArray; Moments10 **moments10Array; From d20621b90e1f471ecc02bfaf5e5a7423aecfb488 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Wed, 15 Jan 2014 14:40:15 +0100 Subject: [PATCH 082/118] trying to vectorize moments acumulation --- fields/EMfields3D.cpp | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index d3d21653..7283eb2f 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -606,6 +606,7 @@ void EMfields3D::sumMoments_vectorized( // prevent threads from writing to the same location for(int cxmod2=0; cxmod2<2; cxmod2++) for(int cymod2=0; cymod2<2; cymod2++) + // each mesh cell is handled by its own thread #pragma omp for collapse(2) for(int cx=cxmod2;cx Date: Wed, 15 Jan 2014 16:19:26 +0100 Subject: [PATCH 083/118] vectorized summing moments --- fields/EMfields3D.cpp | 276 +++++++++++++++++++++++++++--------------- 1 file changed, 181 insertions(+), 95 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 7283eb2f..54bdf0ff 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -562,6 +562,110 @@ void EMfields3D::sumMoments(const Particles3Dcomm* part, Grid * grid, VirtualTop } } +inline void compute_moments(double momentsAcc[8][10][8], + int i, + int imod, + double const * const x, + double const * const y, + double const * const z, + double const * const u, + double const * const v, + double const * const w, + double const * const q, + double xstart, + double ystart, + double zstart, + double inv_dx, + double inv_dy, + double inv_dz, + int cx, + int cy, + int cz) +{ + // compute the quadratic moments of velocity + // + const double ui=u[i]; + const double vi=v[i]; + const double wi=w[i]; + const double uui=ui*ui; + const double uvi=ui*vi; + const double uwi=ui*wi; + const double vvi=vi*vi; + const double vwi=vi*wi; + const double wwi=wi*wi; + double velmoments[10]; + velmoments[0] = 1.; + velmoments[1] = ui; + velmoments[2] = vi; + velmoments[3] = wi; + velmoments[4] = uui; + velmoments[5] = uvi; + velmoments[6] = uwi; + velmoments[7] = vvi; + velmoments[8] = vwi; + velmoments[9] = wwi; + + // compute the weights to distribute the moments + // + double weights[8]; + const double abs_xpos = x[i]; + const double abs_ypos = y[i]; + const double abs_zpos = z[i]; + const double rel_xpos = abs_xpos - xstart; + const double rel_ypos = abs_ypos - ystart; + const double rel_zpos = abs_zpos - zstart; + const double cxm1_pos = rel_xpos * inv_dx; + const double cym1_pos = rel_ypos * inv_dy; + const double czm1_pos = rel_zpos * inv_dz; + //if(true) + //{ + // const int cx_inf = int(floor(cxm1_pos)); + // const int cy_inf = int(floor(cym1_pos)); + // const int cz_inf = int(floor(czm1_pos)); + // assert_eq(cx-1,cx_inf); + // assert_eq(cy-1,cy_inf); + // assert_eq(cz-1,cz_inf); + //} + // fraction of the distance from the right of the cell + const double w1x = cx - cxm1_pos; + const double w1y = cy - cym1_pos; + const double w1z = cz - czm1_pos; + // fraction of distance from the left + const double w0x = 1-w1x; + const double w0y = 1-w1y; + const double w0z = 1-w1z; + // we are calculating a charge moment. + const double qi=q[i]; + const double weight0 = qi*w0x; + const double weight1 = qi*w1x; + const double weight00 = weight0*w0y; + const double weight01 = weight0*w1y; + const double weight10 = weight1*w0y; + const double weight11 = weight1*w1y; + weights[0] = weight00*w0z; // weight000 + weights[1] = weight00*w1z; // weight001 + weights[2] = weight01*w0z; // weight010 + weights[3] = weight01*w1z; // weight011 + weights[4] = weight10*w0z; // weight100 + weights[5] = weight10*w1z; // weight101 + weights[6] = weight11*w0z; // weight110 + weights[7] = weight11*w1z; // weight111 + + // add particle to moments + { + // which is the superior order for the following loop? + for(int c=0; c<8; c++) + for(int m=0; m<10; m++) + { + momentsAcc[c][m][imod] += velmoments[m]*weights[c]; + //momentsArray[c][m] += velmoments[m]*weights[c]; + // When simd above is uncommented, + // the following statement prevents segmentation fault + //assert_isnum(momentsArray[c][m]); + } + } +} + void EMfields3D::sumMoments_vectorized( const Particles3Dcomm* part, Grid * grid, VirtualTopology3D * vct) { @@ -637,111 +741,93 @@ void EMfields3D::sumMoments_vectorized( // accumulator for moments per each of 8 threads double momentsAcc[8][10][8]; - for(int c=0; c<8; c++) - for(int m=0; m<10; m++) - for(int i=0; i<8; i++) - { - momentsAcc[c][m][i] = 0; - } const int numpcls_in_cell = pcls.get_numpcls_in_bucket(cx,cy,cz); const int bucket_offset = pcls.get_bucket_offset(cx,cy,cz); const int bucket_end = bucket_offset+numpcls_in_cell; - // Why does uncommenting here cause a segmentation fault below on xeon? - //#pragma simd - for(int i=bucket_offset; i= aligned_end) { - // compute the quadratic moments of velocity - // - const double ui=u[i]; - const double vi=v[i]; - const double wi=w[i]; - const double uui=ui*ui; - const double uvi=ui*vi; - const double uwi=ui*wi; - const double vvi=vi*vi; - const double vwi=vi*wi; - const double wwi=wi*wi; - double velmoments[10]; - velmoments[0] = 1.; - velmoments[1] = ui; - velmoments[2] = vi; - velmoments[3] = wi; - velmoments[4] = uui; - velmoments[5] = uvi; - velmoments[6] = uwi; - velmoments[7] = vvi; - velmoments[8] = vwi; - velmoments[9] = wwi; - - // compute the weights to distribute the moments - // - double weights[8]; - const double abs_xpos = x[i]; - const double abs_ypos = y[i]; - const double abs_zpos = z[i]; - const double rel_xpos = abs_xpos - xstart; - const double rel_ypos = abs_ypos - ystart; - const double rel_zpos = abs_zpos - zstart; - const double cxm1_pos = rel_xpos * inv_dx; - const double cym1_pos = rel_ypos * inv_dy; - const double czm1_pos = rel_zpos * inv_dz; - //if(true) + for(int c=0; c<8; c++) + for(int m=0; m<10; m++) + { + momentsAcc[c][m][0] = 0; + } + for(int i=bucket_offset; i Date: Wed, 15 Jan 2014 14:40:15 +0100 Subject: [PATCH 084/118] trying to vectorize moments acumulation --- fields/EMfields3D.cpp | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index ed247f17..311eabd1 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -605,6 +605,7 @@ void EMfields3D::sumMoments_vectorized( // prevent threads from writing to the same location for(int cxmod2=0; cxmod2<2; cxmod2++) for(int cymod2=0; cymod2<2; cymod2++) + // each mesh cell is handled by its own thread #pragma omp for collapse(2) for(int cx=cxmod2;cx Date: Wed, 15 Jan 2014 16:19:26 +0100 Subject: [PATCH 085/118] vectorized summing moments --- fields/EMfields3D.cpp | 276 +++++++++++++++++++++++++++--------------- 1 file changed, 181 insertions(+), 95 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 311eabd1..244518fb 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -561,6 +561,110 @@ void EMfields3D::sumMoments(const Particles3Dcomm* part, Grid * grid, VirtualTop } } +inline void compute_moments(double momentsAcc[8][10][8], + int i, + int imod, + double const * const x, + double const * const y, + double const * const z, + double const * const u, + double const * const v, + double const * const w, + double const * const q, + double xstart, + double ystart, + double zstart, + double inv_dx, + double inv_dy, + double inv_dz, + int cx, + int cy, + int cz) +{ + // compute the quadratic moments of velocity + // + const double ui=u[i]; + const double vi=v[i]; + const double wi=w[i]; + const double uui=ui*ui; + const double uvi=ui*vi; + const double uwi=ui*wi; + const double vvi=vi*vi; + const double vwi=vi*wi; + const double wwi=wi*wi; + double velmoments[10]; + velmoments[0] = 1.; + velmoments[1] = ui; + velmoments[2] = vi; + velmoments[3] = wi; + velmoments[4] = uui; + velmoments[5] = uvi; + velmoments[6] = uwi; + velmoments[7] = vvi; + velmoments[8] = vwi; + velmoments[9] = wwi; + + // compute the weights to distribute the moments + // + double weights[8]; + const double abs_xpos = x[i]; + const double abs_ypos = y[i]; + const double abs_zpos = z[i]; + const double rel_xpos = abs_xpos - xstart; + const double rel_ypos = abs_ypos - ystart; + const double rel_zpos = abs_zpos - zstart; + const double cxm1_pos = rel_xpos * inv_dx; + const double cym1_pos = rel_ypos * inv_dy; + const double czm1_pos = rel_zpos * inv_dz; + //if(true) + //{ + // const int cx_inf = int(floor(cxm1_pos)); + // const int cy_inf = int(floor(cym1_pos)); + // const int cz_inf = int(floor(czm1_pos)); + // assert_eq(cx-1,cx_inf); + // assert_eq(cy-1,cy_inf); + // assert_eq(cz-1,cz_inf); + //} + // fraction of the distance from the right of the cell + const double w1x = cx - cxm1_pos; + const double w1y = cy - cym1_pos; + const double w1z = cz - czm1_pos; + // fraction of distance from the left + const double w0x = 1-w1x; + const double w0y = 1-w1y; + const double w0z = 1-w1z; + // we are calculating a charge moment. + const double qi=q[i]; + const double weight0 = qi*w0x; + const double weight1 = qi*w1x; + const double weight00 = weight0*w0y; + const double weight01 = weight0*w1y; + const double weight10 = weight1*w0y; + const double weight11 = weight1*w1y; + weights[0] = weight00*w0z; // weight000 + weights[1] = weight00*w1z; // weight001 + weights[2] = weight01*w0z; // weight010 + weights[3] = weight01*w1z; // weight011 + weights[4] = weight10*w0z; // weight100 + weights[5] = weight10*w1z; // weight101 + weights[6] = weight11*w0z; // weight110 + weights[7] = weight11*w1z; // weight111 + + // add particle to moments + { + // which is the superior order for the following loop? + for(int c=0; c<8; c++) + for(int m=0; m<10; m++) + { + momentsAcc[c][m][imod] += velmoments[m]*weights[c]; + //momentsArray[c][m] += velmoments[m]*weights[c]; + // When simd above is uncommented, + // the following statement prevents segmentation fault + //assert_isnum(momentsArray[c][m]); + } + } +} + void EMfields3D::sumMoments_vectorized( const Particles3Dcomm* part, Grid * grid, VirtualTopology3D * vct) { @@ -636,111 +740,93 @@ void EMfields3D::sumMoments_vectorized( // accumulator for moments per each of 8 threads double momentsAcc[8][10][8]; - for(int c=0; c<8; c++) - for(int m=0; m<10; m++) - for(int i=0; i<8; i++) - { - momentsAcc[c][m][i] = 0; - } const int numpcls_in_cell = pcls.get_numpcls_in_bucket(cx,cy,cz); const int bucket_offset = pcls.get_bucket_offset(cx,cy,cz); const int bucket_end = bucket_offset+numpcls_in_cell; - // Why does uncommenting here cause a segmentation fault below on xeon? - //#pragma simd - for(int i=bucket_offset; i= aligned_end) { - // compute the quadratic moments of velocity - // - const double ui=u[i]; - const double vi=v[i]; - const double wi=w[i]; - const double uui=ui*ui; - const double uvi=ui*vi; - const double uwi=ui*wi; - const double vvi=vi*vi; - const double vwi=vi*wi; - const double wwi=wi*wi; - double velmoments[10]; - velmoments[0] = 1.; - velmoments[1] = ui; - velmoments[2] = vi; - velmoments[3] = wi; - velmoments[4] = uui; - velmoments[5] = uvi; - velmoments[6] = uwi; - velmoments[7] = vvi; - velmoments[8] = vwi; - velmoments[9] = wwi; - - // compute the weights to distribute the moments - // - double weights[8]; - const double abs_xpos = x[i]; - const double abs_ypos = y[i]; - const double abs_zpos = z[i]; - const double rel_xpos = abs_xpos - xstart; - const double rel_ypos = abs_ypos - ystart; - const double rel_zpos = abs_zpos - zstart; - const double cxm1_pos = rel_xpos * inv_dx; - const double cym1_pos = rel_ypos * inv_dy; - const double czm1_pos = rel_zpos * inv_dz; - //if(true) + for(int c=0; c<8; c++) + for(int m=0; m<10; m++) + { + momentsAcc[c][m][0] = 0; + } + for(int i=bucket_offset; i Date: Thu, 16 Jan 2014 23:03:07 +0100 Subject: [PATCH 086/118] improved thread rection of sumMoments() and simplified sumMoments_vectorized() --- fields/EMfields3D.cpp | 323 ++++++++++++++++++++++++++------------ include/EMfields3D.h | 1 + particles/Particles3D.cpp | 48 +++--- 3 files changed, 251 insertions(+), 121 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 244518fb..bae8cc36 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -516,39 +516,63 @@ void EMfields3D::sumMoments(const Particles3Dcomm* part, Grid * grid, VirtualTop // reduction if(!thread_num) timeTasks_begin_task(TimeTasks::MOMENT_REDUCTION); - // reduce arrays + // reduce moments in parallel + // + for(int thread_num=0;thread_num= aligned_end) + bool vectorized=false; + if(!vectorized) { - for(int c=0; c<8; c++) - for(int m=0; m<10; m++) - { - momentsAcc[c][m][0] = 0; - } + // accumulators for moments per each of 8 threads + double momentsAcc[8][10]; + memset(momentsAcc,0,sizeof(double)*8*10); for(int i=bucket_offset; i Date: Wed, 22 Jan 2014 15:51:38 +0100 Subject: [PATCH 087/118] implemented mover_PC_AoS (array of structs particles) --- fields/EMfields3D.cpp | 2 + iPic3D.cpp | 1 + include/Alloc.h | 6 + include/Grid3DCU.h | 6 +- include/PSKOutput.h | 2 +- include/Particle.h | 115 +++++++++++++ include/Particles3D.h | 3 + include/Particles3Dcomm.h | 20 ++- include/arraysfwd.h | 3 + include/iPic3D.h | 4 +- include/ipicmath.h | 47 ++++++ main/Parameters.cpp | 2 +- main/iPic3Dlib.cpp | 44 ++++- particles/Particles3D.cpp | 310 +++++++++++++++++++++++++++++++++- particles/Particles3Dcomm.cpp | 69 ++++++++ scripts/ipic.py | 7 + 16 files changed, 625 insertions(+), 16 deletions(-) create mode 100644 include/Particle.h create mode 100644 include/ipicmath.h diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index bae8cc36..c5af87f0 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -395,6 +395,7 @@ void EMfields3D::sumMoments(const Particles3Dcomm* part, Grid * grid, VirtualTop for (int i = 0; i < ns; i++) { const Particles3Dcomm& pcls = part[i]; + assert_eq(pcls.get_particleType(), ParticleType::AoS); const int is = pcls.get_ns(); assert_eq(i,is); @@ -846,6 +847,7 @@ void EMfields3D::sumMoments_vectorized( for (int species_idx = 0; species_idx < ns; species_idx++) { const Particles3Dcomm& pcls = part[species_idx]; + assert_eq(pcls.get_particleType(), ParticleType::SoA); const int is = pcls.get_ns(); assert_eq(species_idx,is); diff --git a/iPic3D.cpp b/iPic3D.cpp index 4e768a71..91129c7b 100644 --- a/iPic3D.cpp +++ b/iPic3D.cpp @@ -34,6 +34,7 @@ int main(int argc, char **argv) { i = KCode.LastCycle() + 1; } + KCode.convertParticlesToSoA(); KCode.WriteOutput(i); KCode.WriteConserved(i); KCode.WriteRestart(i); diff --git a/include/Alloc.h b/include/Alloc.h index 63100dc3..cdb56e55 100644 --- a/include/Alloc.h +++ b/include/Alloc.h @@ -74,6 +74,12 @@ #define AlignedFree(S) (delete[] S) #define AlignedAlloc(T, NUM) (new T[NUM]) #endif +inline bool is_aligned(void *p, int N) +{ + return (unsigned long)p % N == 0; +} +#define assert_aligned(X, N) assert(is_aligned(X, N)); + // Compile with -DCHECK_BOUNDS to turn on bounds checking. //#define CHECK_BOUNDS diff --git a/include/Grid3DCU.h b/include/Grid3DCU.h index 03b56c1d..72dfc246 100644 --- a/include/Grid3DCU.h +++ b/include/Grid3DCU.h @@ -170,9 +170,9 @@ class Grid3DCU // :public Grid // coordinate accessors // // calculated equivalents (preferred for accelerator?): - //const double &calcXN(int X) { return xStart+(X-1)*dx;} - //const double &calcYN(int Y) { return yStart+(Y-1)*dy;} - //const double &calcZN(int Z) { return zStart+(Z-1)*dz;} + const double calcXN(int X) { return xStart+(X-1)*dx;} + const double calcYN(int Y) { return yStart+(Y-1)*dy;} + const double calcZN(int Z) { return zStart+(Z-1)*dz;} const pfloat &get_pfloat_XN(int X) { return pfloat_node_xcoord[X];} const pfloat &get_pfloat_YN(int Y) { return pfloat_node_ycoord[Y];} const pfloat &get_pfloat_ZN(int Z) { return pfloat_node_zcoord[Z];} diff --git a/include/PSKOutput.h b/include/PSKOutput.h index 624fad4c..89ee1d30 100644 --- a/include/PSKOutput.h +++ b/include/PSKOutput.h @@ -15,7 +15,7 @@ developers: D. Burgess, June/July 2006 #include "errors.h" #include "PSKException.h" -#include "Particles.h" +#include "Particles3Dcomm.h" #include "Field.h" #include "Grid.h" #include "Collective.h" diff --git a/include/Particle.h b/include/Particle.h new file mode 100644 index 00000000..a17c5368 --- /dev/null +++ b/include/Particle.h @@ -0,0 +1,115 @@ +#ifndef _Particle_ +#define _Particle_ + +// Depends on width of vector unit; +// need to be known at compile time. +// +#define AoS_PCLS_AT_A_TIME 2 + +namespace ParticleType +{ + enum Type + { + AoS = 0, + SoA + }; +} + +// intended to occupy 64 bytes +// +// particle for a specific species +class SpeciesParticle +{ + long long ID; + double x[3]; + double u[3]; + double q; + public: + // accessors + long long get_ID()const{ return ID; } + double get_x(int i)const{ return x[i]; } + double get_u(int i)const{ return u[i]; } + double get_q()const{ return q; } + void set_ID(long long in){ ID=in; } + void set_x(int i, double in) { x[i] = in; } + void set_u(int i, double in) { u[i] = in; } + void set_q(double in) { q = in; } + // alternative accessors + double get_x()const{ return x[0]; } + double get_y()const{ return x[1]; } + double get_z()const{ return x[2]; } + double get_u()const{ return u[0]; } + double get_v()const{ return u[1]; } + double get_w()const{ return u[2]; } + void set_x(double in){ x[0]=in; } + void set_y(double in){ x[1]=in; } + void set_z(double in){ x[2]=in; } + void set_u(double in){ u[0]=in; } + void set_v(double in){ u[1]=in; } + void set_w(double in){ u[2]=in; } + void set(long long _ID, + double _x, double _y, double _z, + double _u, double _v, double _w, + double _q) + { + ID = _ID; + x[0] = _x; x[1] = _y; x[2] = _z; + u[0] = _u; u[1] = _v; u[2] = _w; + q = _q; + } +}; + +// intended to occupy 64 bytes +// +// to be used when sorting with every particle advance +struct CellParticle +{ + long long ID; // 8 bytes + int cx[3]; // mesh cell + float fx[3]; // mesh cell position (fraction) + float u[3]; + float fxavg[3]; // for implicit push + float q; // float m would be better for stitching to MHD for dusty plasma + float qom; // for dusty plasma + public: + // accessors + // + // read access + long long get_ID()const{ return ID; } + float get_fx()const{ return fx[0]; } + float get_fy()const{ return fx[1]; } + float get_fz()const{ return fx[2]; } + float get_u()const{ return u[0]; } + float get_v()const{ return u[1]; } + float get_w()const{ return u[2]; } + float get_q()const{ return q; } + void set_ID(long long in){ ID=in; } + // write access + void set_u(float in){ u[0]=in; } + void set_v(float in){ u[1]=in; } + void set_w(float in){ u[2]=in; } + + void init(const SpeciesParticle& pcl, + double cxstart[3], // starting position of cell coordinates + float dx_inv[3], + float _qom) + { + ID = pcl.get_ID(); + // position in mesh coordinates + // + + float xpos[3]; + for(int i=0;i<3;i++) + { + float xpos = (pcl.get_x(i)-cxstart[i])*dx_inv[i]; + float cxpos = floor(xpos); + cx[i] = int(cxpos); + fxavg[i] = fx[i] = cxpos - cx[i]; + u[i] = pcl.get_u(i); + } + q = pcl.get_q(); + qom = _qom; + } +}; + +#endif diff --git a/include/Particles3D.h b/include/Particles3D.h index ccd210b4..05a8701a 100644 --- a/include/Particles3D.h +++ b/include/Particles3D.h @@ -58,6 +58,9 @@ class Particles3D:public Particles3Dcomm { void mover_explicit(Grid * grid, VirtualTopology3D * vct, Field * EMf); /** mover with a Predictor-Corrector Scheme */ void mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf); + /** array-of-structs version of mover_PC */ + void mover_PC_AoS2(Grid * grid, VirtualTopology3D * vct, Field * EMf); + void mover_PC_AoS(Grid * grid, VirtualTopology3D * vct, Field * EMf); /** vectorized version of mover_PC **/ void mover_PC_vectorized(Grid * grid, VirtualTopology3D * vct, Field * EMf); /** communicate particle after moving them */ diff --git a/include/Particles3Dcomm.h b/include/Particles3Dcomm.h index 5d40cca8..4b6fb098 100644 --- a/include/Particles3Dcomm.h +++ b/include/Particles3Dcomm.h @@ -7,7 +7,11 @@ developers: Stefano Markidis, Giovanni Lapenta #ifndef Part3DCOMM_H #define Part3DCOMM_H -#include "Particles.h" +#include "CollectiveIO.h" +#include "VirtualTopology3D.h" +#include "Grid.h" +#include "Field.h" +#include "Particle.h" /** * * class for particles of the same species with communications methods @@ -59,6 +63,14 @@ class Particles3Dcomm // :public Particles /** calculate the weights given the position of particles */ // void calculateWeights(double*** weight, double xp, double yp, double zp,int ix, int iy, int iz, Grid* grid); + private: + void copyParticlesToAoS(); + void copyParticlesToSoA(); + + public: + void convertParticlesToAoS(); + void convertParticlesToSoA(); + /*! sort particles for vectorized push (needs to be parallelized) */ void sort_particles_serial(Grid * grid, VirtualTopology3D * vct); /*! sort particles with respect to provided position data */ @@ -108,6 +120,7 @@ class Particles3Dcomm // :public Particles // inline get accessors // + ParticleType::Type get_particleType()const { return particleType; } double *getXall() const { return (x); } double *getYall() const { return (y); } double *getZall() const { return (z); } @@ -185,8 +198,13 @@ class Particles3Dcomm // :public Particles /** w0 Drift velocity - Direction Z */ double w0; + ParticleType::Type particleType; // particles data // + // AoS representation + SpeciesParticle *pcls; + // SoA representation + // /** Positions array - X component */ double *x; /** Positions array - Y component */ diff --git a/include/arraysfwd.h b/include/arraysfwd.h index 706c057d..86dbd5d8 100644 --- a/include/arraysfwd.h +++ b/include/arraysfwd.h @@ -38,6 +38,9 @@ typedef iPic3D::array_ref2 arr2_int; typedef iPic3D::array_ref3 arr3_int; typedef iPic3D::array_ref4 arr4_int; // +typedef iPic3D::const_array_ref3 const_arr3_ptr; +typedef iPic3D::array_ref3 arr3_ptr; +// typedef iPic3D::const_array_ref3 const_arr3_double; typedef iPic3D::const_array_ref4 const_arr4_double; typedef iPic3D::const_array_ref4 const_arr4_pfloat; diff --git a/include/iPic3D.h b/include/iPic3D.h index 93db7d11..6d7d1063 100644 --- a/include/iPic3D.h +++ b/include/iPic3D.h @@ -47,6 +47,9 @@ namespace iPic3D { inline int LastCycle(); inline int get_myrank(); + void convertParticlesToSoA(); + void convertParticlesToAoS(); + private: static MPIdata * mpi; Collective *col; @@ -96,7 +99,6 @@ namespace iPic3D { inline int c_Solver::get_myrank() { return (myrank); } - } #endif diff --git a/include/ipicmath.h b/include/ipicmath.h new file mode 100644 index 00000000..edf0ca5e --- /dev/null +++ b/include/ipicmath.h @@ -0,0 +1,47 @@ +#ifndef _ipicmath_h_ +#define _ipicmath_h_ + +// valid if roundup power is representable. +inline int +pow2roundup (int x) +{ + assert(x>=0); + //if (x < 0) + // return 0; + --x; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return x+1; +} + +// does not work if highest non-sign bit is set +inline int +pow2rounddown (int x) +{ + assert(x>=0); + //if (x < 0) + // return 0; + + // set all bits below highest bit + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + // set the bit higher than the highest bit + x++; + // shift it down and return it + return (x >> 1); +} + +// round n up to next multiple of m +inline int roundup_to_multiple(int n, int m) +{ + //return ((n-1)/m+1)*m; + return (n+m-1)/m*m; +} + +#endif diff --git a/main/Parameters.cpp b/main/Parameters.cpp index 77abb87e..9a51e13f 100644 --- a/main/Parameters.cpp +++ b/main/Parameters.cpp @@ -12,7 +12,7 @@ void Parameters::init_parameters() //bool Parameters::get_SORTING_PARTICLES() { return SORTING_PARTICLES; } bool Parameters::get_SORTING_PARTICLES() { return true; } bool Parameters::get_VECTORIZE_MOMENTS() { return true; } -bool Parameters::get_VECTORIZE_MOVER() { return true; } +bool Parameters::get_VECTORIZE_MOVER() { return false; } // this must also return true if we communicate particles per iteration //bool Parameters::get_USING_XAVG() { return get_VECTORIZE_MOVER(); } bool Parameters::get_USING_XAVG() { return get_SORTING_PARTICLES(); } diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 4d8c532d..79dce395 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -156,7 +156,7 @@ int c_Solver::Init(int argc, char **argv) { my_file.close(); } cqsat = SaveDirName + "/VirtualSatelliteTraces" + num_proc.str() + ".txt"; - // if(myrank==0){ + // if(myrank==0) ofstream my_file(cqsat.c_str(), fstream::binary); nsat = 3; for (int isat = 0; isat < nsat; isat++) { @@ -178,6 +178,8 @@ int c_Solver::Init(int argc, char **argv) { void c_Solver::CalculateMoments() { + convertParticlesToSoA(); + timeTasks_set_main_task(TimeTasks::MOMENTS); EMf->updateInfoFields(grid,vct,col); @@ -199,11 +201,13 @@ void c_Solver::CalculateMoments() { { // since particles are sorted, // we can vectorize interpolation of particles to grid + convertParticlesToSoA(); EMf->sumMoments_vectorized(part, grid, vct); } else { EMf->setZeroPrimaryMoments(); + convertParticlesToSoA(); EMf->sumMoments(part, grid, vct); } //for (int i = 0; i < ns; i++) @@ -259,12 +263,18 @@ bool c_Solver::ParticlesMover() { // should merely pass EMf->get_fieldForPcls() rather than EMf. // use the Predictor Corrector scheme to move particles if(Parameters::get_VECTORIZE_MOVER()) + { part[i].mover_PC_vectorized(grid, vct, EMf); + } else - part[i].mover_PC(grid, vct, EMf); + { + //part[i].mover_PC(grid, vct, EMf); + //part[i].mover_PC_AoS2(grid, vct, EMf); + part[i].mover_PC_AoS(grid, vct, EMf); + } } } - for (int i = 0; i < ns; i++) // move each species + for (int i = 0; i < ns; i++) // communicate each species { mem_avail = part[i].communicate_particles(vct); } @@ -350,22 +360,29 @@ void c_Solver::WriteConserved(int cycle) { } void c_Solver::WriteOutput(int cycle) { - // OUTPUT to large file, called proc** + + bool write_fields = (cycle % (col->getFieldOutputCycle()) == 0 || cycle == first_cycle); + + bool write_particles = (cycle % (col->getParticlesOutputCycle()) == 0 + && col->getParticlesOutputCycle() != 1); + + if(write_particles){ convertParticlesToSoA(); } if (col->getWriteMethod() == "Parallel") { - if (cycle % (col->getFieldOutputCycle()) == 0 || cycle == first_cycle) { + if (write_fields) { WriteOutputParallel(grid, EMf, col, vct, cycle); } } else { - if (cycle % (col->getFieldOutputCycle()) == 0 || cycle == first_cycle) { + // OUTPUT to large file, called proc** + if (write_fields) { hdf5_agent.open_append(SaveDirName + "/proc" + num_proc.str() + ".hdf"); output_mgr.output("Eall + Ball + rhos + Jsall + pressure", cycle); // Pressure tensor is available hdf5_agent.close(); } - if (cycle % (col->getParticlesOutputCycle()) == 0 && col->getParticlesOutputCycle() != 1) { + if (write_particles) { hdf5_agent.open_append(SaveDirName + "/proc" + num_proc.str() + ".hdf"); output_mgr.output("position + velocity + q ", cycle, 1); hdf5_agent.close(); @@ -410,3 +427,16 @@ void c_Solver::Finalize() { mpi->finalize_mpi(); } +// convert particle to struct of arrays (assumed by I/O) +void c_Solver::convertParticlesToSoA() +{ + for (int i = 0; i < ns; i++) + part[i].convertParticlesToSoA(); +} + +// convert particle to array of structs (used in computing) +void c_Solver::convertParticlesToAoS() +{ + for (int i = 0; i < ns; i++) + part[i].convertParticlesToAoS(); +} diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 3cc55c8e..1d1f567b 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -312,6 +312,7 @@ void Particles3D::mover_explicit(Grid * grid, VirtualTopology3D * vct, Field * E } /** mover with a Predictor-Corrector scheme */ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { + convertParticlesToSoA(); #pragma omp master if (vct->getCartesian_rank() == 0) { cout << "*** MOVER species " << ns << " ***" << NiterMover << " ITERATIONS ****" << endl; @@ -456,10 +457,314 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { { timeTasks_end_task(TimeTasks::MOVER_PCL_MOVING); } } +void Particles3D::mover_PC_AoS2(Grid * grid, VirtualTopology3D * vct, Field * EMf) +{ + convertParticlesToAoS(); + #pragma omp master + if (vct->getCartesian_rank() == 0) { + cout << "*** MOVER species " << ns << " ***" << NiterMover << " ITERATIONS ****" << endl; + } + const_arr4_pfloat fieldForPcls = EMf->get_fieldForPcls(); + + #pragma omp master + { timeTasks_begin_task(TimeTasks::MOVER_PCL_MOVING); } + const pfloat dto2 = .5 * dt, qdto2mc = qom * dto2 / c; + #pragma omp for schedule(static) + for (int pidx = 0; pidx < nop; pidx++) { + // copy the particle + SpeciesParticle& pcl = pcls[pidx]; + const pfloat xorig = pcl.get_x(); + const pfloat yorig = pcl.get_y(); + const pfloat zorig = pcl.get_z(); + const pfloat uorig = pcl.get_u(); + const pfloat vorig = pcl.get_v(); + const pfloat worig = pcl.get_w(); + pfloat xavg = xorig; + pfloat yavg = yorig; + pfloat zavg = zorig; + pfloat uavg; + pfloat vavg; + pfloat wavg; + // calculate the average velocity iteratively + for (int innter = 0; innter < NiterMover; innter++) { + // interpolation G-->P + const pfloat ixd = floor((xavg - xstart) * inv_dx); + const pfloat iyd = floor((yavg - ystart) * inv_dy); + const pfloat izd = floor((zavg - zstart) * inv_dz); + // interface of index to right of cell + int ix = 2 + int(ixd); + int iy = 2 + int(iyd); + int iz = 2 + int(izd); + + // use field data of closest cell in domain + // + if (ix < 1) ix = 1; + if (iy < 1) iy = 1; + if (iz < 1) iz = 1; + if (ix > nxc) ix = nxc; + if (iy > nyc) iy = nyc; + if (iz > nzc) iz = nzc; + // index of cell of particle; + //const int cx = ix - 1; + //const int cy = iy - 1; + //const int cz = iz - 1; + + const pfloat xi0 = xavg - grid->get_pfloat_XN(ix-1); + const pfloat eta0 = yavg - grid->get_pfloat_YN(iy-1); + const pfloat zeta0 = zavg - grid->get_pfloat_ZN(iz-1); + const pfloat xi1 = grid->get_pfloat_XN(ix) - xavg; + const pfloat eta1 = grid->get_pfloat_YN(iy) - yavg; + const pfloat zeta1 = grid->get_pfloat_ZN(iz) - zavg; + + pfloat Exl = 0.0; + pfloat Eyl = 0.0; + pfloat Ezl = 0.0; + pfloat Bxl = 0.0; + pfloat Byl = 0.0; + pfloat Bzl = 0.0; + + pfloat weights[8]; + const pfloat weight0 = invVOL*xi0; + const pfloat weight1 = invVOL*xi1; + const pfloat weight00 = weight0*eta0; + const pfloat weight01 = weight0*eta1; + const pfloat weight10 = weight1*eta0; + const pfloat weight11 = weight1*eta1; + weights[0] = weight00*zeta0; // weight000 + weights[1] = weight00*zeta1; // weight001 + weights[2] = weight01*zeta0; // weight010 + weights[3] = weight01*zeta1; // weight011 + weights[4] = weight10*zeta0; // weight100 + weights[5] = weight10*zeta1; // weight101 + weights[6] = weight11*zeta0; // weight110 + weights[7] = weight11*zeta1; // weight111 + //weights[0] = xi0 * eta0 * zeta0 * qi * invVOL; // weight000 + //weights[1] = xi0 * eta0 * zeta1 * qi * invVOL; // weight001 + //weights[2] = xi0 * eta1 * zeta0 * qi * invVOL; // weight010 + //weights[3] = xi0 * eta1 * zeta1 * qi * invVOL; // weight011 + //weights[4] = xi1 * eta0 * zeta0 * qi * invVOL; // weight100 + //weights[5] = xi1 * eta0 * zeta1 * qi * invVOL; // weight101 + //weights[6] = xi1 * eta1 * zeta0 * qi * invVOL; // weight110 + //weights[7] = xi1 * eta1 * zeta1 * qi * invVOL; // weight111 + + // creating these aliases seems to accelerate this method by about 30% + // on the Xeon host, processor, suggesting deficiency in the optimizer. + // + arr1_pfloat_get field_components[8]; + field_components[0] = fieldForPcls[ix ][iy ][iz ]; // field000 + field_components[1] = fieldForPcls[ix ][iy ][iz-1]; // field001 + field_components[2] = fieldForPcls[ix ][iy-1][iz ]; // field010 + field_components[3] = fieldForPcls[ix ][iy-1][iz-1]; // field011 + field_components[4] = fieldForPcls[ix-1][iy ][iz ]; // field100 + field_components[5] = fieldForPcls[ix-1][iy ][iz-1]; // field101 + field_components[6] = fieldForPcls[ix-1][iy-1][iz ]; // field110 + field_components[7] = fieldForPcls[ix-1][iy-1][iz-1]; // field111 + + for(int c=0; c<8; c++) + { + Bxl += weights[c] * field_components[c][0]; + Byl += weights[c] * field_components[c][1]; + Bzl += weights[c] * field_components[c][2]; + Exl += weights[c] * field_components[c][3]; + Eyl += weights[c] * field_components[c][4]; + Ezl += weights[c] * field_components[c][5]; + } + const double Omx = qdto2mc*Bxl; + const double Omy = qdto2mc*Byl; + const double Omz = qdto2mc*Bzl; + + // end interpolation + const pfloat omsq = (Omx * Omx + Omy * Omy + Omz * Omz); + const pfloat denom = 1.0 / (1.0 + omsq); + // solve the position equation + const pfloat ut = uorig + qdto2mc * Exl; + const pfloat vt = vorig + qdto2mc * Eyl; + const pfloat wt = worig + qdto2mc * Ezl; + //const pfloat udotb = ut * Bxl + vt * Byl + wt * Bzl; + const pfloat udotOm = ut * Omx + vt * Omy + wt * Omz; + // solve the velocity equation + uavg = (ut + (vt * Omz - wt * Omy + udotOm * Omx)) * denom; + vavg = (vt + (wt * Omx - ut * Omz + udotOm * Omy)) * denom; + wavg = (wt + (ut * Omy - vt * Omx + udotOm * Omz)) * denom; + // update average position + xavg = xorig + uavg * dto2; + yavg = yorig + vavg * dto2; + zavg = zorig + wavg * dto2; + } // end of iteration + // update the final position and velocity + pcl.set_x(xorig + uavg * dt); + pcl.set_y(yorig + vavg * dt); + pcl.set_z(zorig + wavg * dt); + pcl.set_u(2.0 * uavg - uorig); + pcl.set_v(2.0 * vavg - vorig); + pcl.set_w(2.0 * wavg - worig); + } + #pragma omp master + { timeTasks_end_task(TimeTasks::MOVER_PCL_MOVING); } +} +void Particles3D::mover_PC_AoS(Grid * grid, VirtualTopology3D * vct, Field * EMf) +{ + convertParticlesToAoS(); + #pragma omp master + if (vct->getCartesian_rank() == 0) { + cout << "*** MOVER species " << ns << " ***" << NiterMover << " ITERATIONS ****" << endl; + } + const_arr4_pfloat fieldForPcls = EMf->get_fieldForPcls(); + + #pragma omp master + { timeTasks_begin_task(TimeTasks::MOVER_PCL_MOVING); } + const double dto2 = .5 * dt, qdto2mc = qom * dto2 / c; + #pragma omp for schedule(static) + // why does single precision make no difference in execution speed? + //#pragma simd vectorlength(VECTOR_WIDTH) + for (int pidx = 0; pidx < nop; pidx++) { + // copy the particle + SpeciesParticle& pcl = pcls[pidx]; + const double xorig = pcl.get_x(); + const double yorig = pcl.get_y(); + const double zorig = pcl.get_z(); + const double uorig = pcl.get_u(); + const double vorig = pcl.get_v(); + const double worig = pcl.get_w(); + double xavg = xorig; + double yavg = yorig; + double zavg = zorig; + double uavg; + double vavg; + double wavg; + // calculate the average velocity iteratively + for (int innter = 0; innter < NiterMover; innter++) { + + // compute weights for field components + // + double weights[8]; + // xstart marks start of domain excluding ghosts + const double rel_xpos = xavg - xstart; + const double rel_ypos = yavg - ystart; + const double rel_zpos = zavg - zstart; + // cell position minus 1 (due to ghost cells) + const double cxm1_pos = rel_xpos * inv_dx; + const double cym1_pos = rel_ypos * inv_dy; + const double czm1_pos = rel_zpos * inv_dz; + // + int cx = 1 + int(floor(cxm1_pos)); + int cy = 1 + int(floor(cym1_pos)); + int cz = 1 + int(floor(czm1_pos)); + + // if the cell is outside the domain, then treat it as + // in the nearest ghost cell. + // + if (cx < 0) cx = 0; + if (cy < 0) cy = 0; + if (cz < 0) cz = 0; + // number of cells in x direction including ghosts is nxc + if (cx >= nxc) cx = nxc-1; + if (cy >= nyc) cy = nyc-1; + if (cz >= nzc) cz = nzc-1; + + // index of interface to right of cell + const int ix = cx + 1; + const int iy = cy + 1; + const int iz = cz + 1; + + // fraction of the distance from the right of the cell + const double w1x = cx - cxm1_pos; + const double w1y = cy - cym1_pos; + const double w1z = cz - czm1_pos; + // fraction of distance from the left + const double w0x = 1-w1x; + const double w0y = 1-w1y; + const double w0z = 1-w1z; + //const double weight00 = w0x*w0y; + //const double weight01 = w0x*w1y; + //const double weight10 = w1x*w0y; + //const double weight11 = w1x*w1y; + //weights[0] = weight00*w0z; // weight000 + //weights[1] = weight00*w1z; // weight001 + //weights[2] = weight01*w0z; // weight010 + //weights[3] = weight01*w1z; // weight011 + //weights[4] = weight10*w0z; // weight100 + //weights[5] = weight10*w1z; // weight101 + //weights[6] = weight11*w0z; // weight110 + //weights[7] = weight11*w1z; // weight111 + // + weights[0] = w0x*w0y*w0z; // weight000 + weights[1] = w0x*w0y*w1z; // weight001 + weights[2] = w0x*w1y*w0z; // weight010 + weights[3] = w0x*w1y*w1z; // weight011 + weights[4] = w1x*w0y*w0z; // weight100 + weights[5] = w1x*w0y*w1z; // weight101 + weights[6] = w1x*w1y*w0z; // weight110 + weights[7] = w1x*w1y*w1z; // weight111 + + pfloat Exl = 0.0; + pfloat Eyl = 0.0; + pfloat Ezl = 0.0; + pfloat Bxl = 0.0; + pfloat Byl = 0.0; + pfloat Bzl = 0.0; + + // creating these aliases seems to accelerate this method by about 30% + // on the Xeon host, processor, suggesting deficiency in the optimizer. + // + arr1_pfloat_get field_components[8]; + field_components[0] = fieldForPcls[ix][iy][iz]; // field000 + field_components[1] = fieldForPcls[ix][iy][cz]; // field001 + field_components[2] = fieldForPcls[ix][cy][iz]; // field010 + field_components[3] = fieldForPcls[ix][cy][cz]; // field011 + field_components[4] = fieldForPcls[cx][iy][iz]; // field100 + field_components[5] = fieldForPcls[cx][iy][cz]; // field101 + field_components[6] = fieldForPcls[cx][cy][iz]; // field110 + field_components[7] = fieldForPcls[cx][cy][cz]; // field111 + + for(int c=0; c<8; c++) + { + Bxl += weights[c] * field_components[c][0]; + Byl += weights[c] * field_components[c][1]; + Bzl += weights[c] * field_components[c][2]; + Exl += weights[c] * field_components[c][3]; + Eyl += weights[c] * field_components[c][4]; + Ezl += weights[c] * field_components[c][5]; + } + const double Omx = qdto2mc*Bxl; + const double Omy = qdto2mc*Byl; + const double Omz = qdto2mc*Bzl; + + // end interpolation + const pfloat omsq = (Omx * Omx + Omy * Omy + Omz * Omz); + const pfloat denom = 1.0 / (1.0 + omsq); + // solve the position equation + const pfloat ut = uorig + qdto2mc * Exl; + const pfloat vt = vorig + qdto2mc * Eyl; + const pfloat wt = worig + qdto2mc * Ezl; + //const pfloat udotb = ut * Bxl + vt * Byl + wt * Bzl; + const pfloat udotOm = ut * Omx + vt * Omy + wt * Omz; + // solve the velocity equation + uavg = (ut + (vt * Omz - wt * Omy + udotOm * Omx)) * denom; + vavg = (vt + (wt * Omx - ut * Omz + udotOm * Omy)) * denom; + wavg = (wt + (ut * Omy - vt * Omx + udotOm * Omz)) * denom; + // update average position + xavg = xorig + uavg * dto2; + yavg = yorig + vavg * dto2; + zavg = zorig + wavg * dto2; + } // end of iteration + // update the final position and velocity + pcl.set_x(xorig + uavg * dt); + pcl.set_y(yorig + vavg * dt); + pcl.set_z(zorig + wavg * dt); + pcl.set_u(2.0 * uavg - uorig); + pcl.set_v(2.0 * vavg - vorig); + pcl.set_w(2.0 * wavg - worig); + } // END OF ALL THE PARTICLES + #pragma omp master + { timeTasks_end_task(TimeTasks::MOVER_PCL_MOVING); } +} + /** mover with a Predictor-Corrector scheme */ void Particles3D::mover_PC_vectorized( Grid * grid, VirtualTopology3D * vct, Field * EMf) { + convertParticlesToSoA(); assert_eq(nxc,nxn-1); assert_eq(nyc,nyn-1); assert_eq(nzc,nzn-1); @@ -535,8 +840,8 @@ void Particles3D::mover_PC_vectorized( ALIGNED(u); ALIGNED(v); ALIGNED(w); - // this should vectorize, but could be faster if particle - // data for each mesh cell were aligned. + // This pragma help on Xeon but hurts on Xeon Phi. + // On the Phi we could accelerate by processing two particles at a time. #pragma simd //for(int pidx=bucket_offset_1d[cell]; pidxgetNp(species) / (vct->getNprocs()); np_tot = col->getNp(species); npmax = col->getNpMax(species) / (vct->getNprocs()); + // ensure that npmax is a multiple of AoS_PCLS_AT_A_TIME + npmax = roundup_to_multiple(npmax,AoS_PCLS_AT_A_TIME); qom = col->getQOM(species); uth = col->getUth(species); vth = col->getVth(species); @@ -178,6 +182,21 @@ void Particles3Dcomm::allocate(int species, CollectiveIO * col, VirtualTopology3 // ////////////////////////////////////////////////////////////// // ////////////// ALLOCATE ARRAYS ///////////////////////// // ////////////////////////////////////////////////////////////// + // + // AoS particle representation + // + // intel new allocates with 64-byte alignment + // since particles are 64 bytes wide, every particle + // is aligned. + pcls = new SpeciesParticle[npmax]; + particleType = ParticleType::SoA; + #ifdef __INTEL_COMPILER + assert_eq(sizeof(Particle),64); + ALIGNED(pcls); + #endif + // + // SoA particle representation + // // positions x = new double[npmax]; y = new double[npmax]; @@ -1214,4 +1233,54 @@ void Particles3Dcomm::sort_particles_serial( //} //#endif +void Particles3Dcomm::copyParticlesToSoA() +{ + dprintf("copying to struct of arrays"); + #pragma omp for + for(int pidx=0; pidx Date: Tue, 21 Jan 2014 15:23:25 +0100 Subject: [PATCH 088/118] added compile options -g -xHost for Xeon --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f70f8451..28d0690d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,7 @@ if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "k1om") ## Xeon Phi #set(CMAKE_CXX_FLAGS "-openmp -g -mmic") # set flags for Xeon Phi, totalview elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64") ## Xeon set(CMAKE_CXX_COMPILER "icpc") - set(CMAKE_CXX_FLAGS "-O3 -openmp -fno-exceptions -vec-report") + set(CMAKE_CXX_FLAGS "-O3 -openmp -g -xHost -fno-exceptions -vec-report") else() set(CMAKE_CXX_FLAGS "-O3") endif() From b947bb4c39cef810d15c019b9c60f88a2bd1938b Mon Sep 17 00:00:00 2001 From: eajohnson Date: Thu, 23 Jan 2014 00:42:51 +0100 Subject: [PATCH 089/118] fixed compiler bugs on MIC introduced in e1dba2f4abd --- include/Grid3DCU.h | 24 ++++++++++++------------ include/ipicmath.h | 1 + particles/Particles3Dcomm.cpp | 5 +++-- utility/debug.cpp | 2 +- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/include/Grid3DCU.h b/include/Grid3DCU.h index 72dfc246..c0a60e7e 100644 --- a/include/Grid3DCU.h +++ b/include/Grid3DCU.h @@ -170,18 +170,18 @@ class Grid3DCU // :public Grid // coordinate accessors // // calculated equivalents (preferred for accelerator?): - const double calcXN(int X) { return xStart+(X-1)*dx;} - const double calcYN(int Y) { return yStart+(Y-1)*dy;} - const double calcZN(int Z) { return zStart+(Z-1)*dz;} - const pfloat &get_pfloat_XN(int X) { return pfloat_node_xcoord[X];} - const pfloat &get_pfloat_YN(int Y) { return pfloat_node_ycoord[Y];} - const pfloat &get_pfloat_ZN(int Z) { return pfloat_node_zcoord[Z];} - const double &getXN(int X) { return node_xcoord[X];} - const double &getYN(int Y) { return node_ycoord[Y];} - const double &getZN(int Z) { return node_zcoord[Z];} - const double &getXC(int X) { return center_xcoord[X];} - const double &getYC(int Y) { return center_ycoord[Y];} - const double &getZC(int Z) { return center_zcoord[Z];} + double calcXN(int X)const{ return xStart+(X-1)*dx;} + double calcYN(int Y)const{ return yStart+(Y-1)*dy;} + double calcZN(int Z)const{ return zStart+(Z-1)*dz;} + const pfloat &get_pfloat_XN(int X)const{ return pfloat_node_xcoord[X];} + const pfloat &get_pfloat_YN(int Y)const{ return pfloat_node_ycoord[Y];} + const pfloat &get_pfloat_ZN(int Z)const{ return pfloat_node_zcoord[Z];} + const double &getXN(int X)const{ return node_xcoord[X];} + const double &getYN(int Y)const{ return node_ycoord[Y];} + const double &getZN(int Z)const{ return node_zcoord[Z];} + const double &getXC(int X)const{ return center_xcoord[X];} + const double &getYC(int Y)const{ return center_ycoord[Y];} + const double &getZC(int Z)const{ return center_zcoord[Z];} // // The following could be eliminated in favor of the previous // unless we truly anticipate generalizing to a deformed diff --git a/include/ipicmath.h b/include/ipicmath.h index edf0ca5e..a042ef92 100644 --- a/include/ipicmath.h +++ b/include/ipicmath.h @@ -1,5 +1,6 @@ #ifndef _ipicmath_h_ #define _ipicmath_h_ +#include "assert.h" // valid if roundup power is representable. inline int diff --git a/particles/Particles3Dcomm.cpp b/particles/Particles3Dcomm.cpp index 446e305b..12adb178 100644 --- a/particles/Particles3Dcomm.cpp +++ b/particles/Particles3Dcomm.cpp @@ -25,6 +25,7 @@ developers: Stefano Markidis, Giovanni Lapenta. #include "ompdefs.h" #include "ipicmath.h" +#include "Particle.h" #include "Particles3Dcomm.h" #include "Parameters.h" @@ -191,7 +192,7 @@ void Particles3Dcomm::allocate(int species, CollectiveIO * col, VirtualTopology3 pcls = new SpeciesParticle[npmax]; particleType = ParticleType::SoA; #ifdef __INTEL_COMPILER - assert_eq(sizeof(Particle),64); + assert_eq(sizeof(SpeciesParticle),64); ALIGNED(pcls); #endif // @@ -1253,8 +1254,8 @@ void Particles3Dcomm::copyParticlesToSoA() void Particles3Dcomm::copyParticlesToAoS() { - #pragma omp for dprintf("copying to array of structs"); + #pragma omp for for(int pidx=0; pidx Date: Thu, 23 Jan 2014 01:47:49 +0100 Subject: [PATCH 090/118] added TimeTasks: TRANSPOSE_PCLS_TO_SOA and TRANSPOSE_PCLS_TO_AOS --- include/TimeTasks.h | 2 ++ particles/Particles3Dcomm.cpp | 3 +++ utility/TimeTasks.cpp | 2 ++ 3 files changed, 7 insertions(+) diff --git a/include/TimeTasks.h b/include/TimeTasks.h index 3ac67b23..ff07f26c 100644 --- a/include/TimeTasks.h +++ b/include/TimeTasks.h @@ -32,6 +32,8 @@ class TimeTasks MOMENT_REDUCTION, MOVER_PCL_SORTING, MOVER_PCL_MOVING, + TRANSPOSE_PCLS_TO_AOS, + TRANSPOSE_PCLS_TO_SOA, NUMBER_OF_TASKS // this line should be last }; diff --git a/particles/Particles3Dcomm.cpp b/particles/Particles3Dcomm.cpp index 12adb178..4533266d 100644 --- a/particles/Particles3Dcomm.cpp +++ b/particles/Particles3Dcomm.cpp @@ -33,6 +33,7 @@ developers: Stefano Markidis, Giovanni Lapenta. #include #include #include "debug.h" +#include "TimeTasks.h" using std::cout; using std::cerr; @@ -1236,6 +1237,7 @@ void Particles3Dcomm::sort_particles_serial( void Particles3Dcomm::copyParticlesToSoA() { + timeTasks_set_task(TimeTasks::TRANSPOSE_PCLS_TO_SOA); dprintf("copying to struct of arrays"); #pragma omp for for(int pidx=0; pidx Date: Thu, 23 Jan 2014 01:49:25 +0100 Subject: [PATCH 091/118] restricted TimeTasks to master thread of rank 0 process --- include/TimeTasks.h | 37 ++++--------------------- utility/TimeTasks.cpp | 64 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 67 insertions(+), 34 deletions(-) diff --git a/include/TimeTasks.h b/include/TimeTasks.h index ff07f26c..a00d8d76 100644 --- a/include/TimeTasks.h +++ b/include/TimeTasks.h @@ -120,16 +120,8 @@ class TimeTasks_caller_to_set_main_task_for_scope double start_time; TimeTasks::Tasks task; public: - TimeTasks_caller_to_set_main_task_for_scope(TimeTasks::Tasks _task) : - task(_task) - { - start_time = MPI_Wtime(); - timeTasks.start_main_task(task); - } - ~TimeTasks_caller_to_set_main_task_for_scope() - { - timeTasks.end_main_task(task, start_time); - } + TimeTasks_caller_to_set_main_task_for_scope(TimeTasks::Tasks _task); + ~TimeTasks_caller_to_set_main_task_for_scope(); }; class TimeTasks_caller_to_set_task_for_scope @@ -138,27 +130,8 @@ class TimeTasks_caller_to_set_task_for_scope double start_time; TimeTasks::Tasks task; public: - TimeTasks_caller_to_set_task_for_scope(TimeTasks::Tasks _task) : - task(_task) - { - already_active = timeTasks.is_active(task); - if(!already_active) - { - start_time = MPI_Wtime(); - timeTasks.start_task(task); - } - } - ~TimeTasks_caller_to_set_task_for_scope() - { - if(already_active) - { - assert(timeTasks.is_active(task)); - } - else - { - timeTasks.end_task(task, start_time); - } - } + TimeTasks_caller_to_set_task_for_scope(TimeTasks::Tasks _task); + ~TimeTasks_caller_to_set_task_for_scope(); }; class TimeTasks_caller_to_set_communication_mode_for_scope @@ -174,7 +147,7 @@ class TimeTasks_caller_to_set_communication_mode_for_scope // These macros could be changed to provide file and line number // // We need to create nonanonymous instances so that the destructor -// will not be called until the end of the scope, so we use the preprocessor. +// will not be called until the end of the scope, so we use the preprocessor // to generate unique names of nonanonymous instances. // #define timeTasks_set_main_task(task) \ diff --git a/utility/TimeTasks.cpp b/utility/TimeTasks.cpp index b37ac4e5..ec908d73 100644 --- a/utility/TimeTasks.cpp +++ b/utility/TimeTasks.cpp @@ -4,6 +4,7 @@ #include "TimeTasks.h" #include "asserts.h" #include "MPIdata.h" // for get_rank +#include "parallel.h" #include "debug.h" /** implementation of declarations in utility/TimeTasks.h **/ @@ -49,6 +50,7 @@ void TimeTasks::resetCycle() } void TimeTasks::start_main_task(TimeTasks::Tasks taskid) { + if(!is_output_thread()) return; assert(is_exclusive(taskid)); assert_ne(active_task, taskid); active_task = taskid; @@ -57,6 +59,7 @@ void TimeTasks::start_main_task(TimeTasks::Tasks taskid) } void TimeTasks::start_task(TimeTasks::Tasks taskid) { + if(!is_output_thread()) return; assert(!is_exclusive(taskid)); assert(!active[taskid]); active[taskid]=true; @@ -64,6 +67,7 @@ void TimeTasks::start_task(TimeTasks::Tasks taskid) // have to manage the task stack explicitly void TimeTasks::start_task(TimeTasks::Tasks taskid, double start_time) { + if(!is_output_thread()) return; if(stack_depth[taskid]==0) { start_times[taskid]=start_time; @@ -73,11 +77,13 @@ void TimeTasks::start_task(TimeTasks::Tasks taskid, double start_time) } void TimeTasks::end_main_task(TimeTasks::Tasks taskid, double start_time) { + if(!is_output_thread()) return; end_task(taskid, start_time); active_task = NONE; } void TimeTasks::end_task(TimeTasks::Tasks taskid, double start_time) { + if(!is_output_thread()) return; assert(active[taskid]); double now = MPI_Wtime(); // compute time spent on task @@ -87,6 +93,7 @@ void TimeTasks::end_task(TimeTasks::Tasks taskid, double start_time) // have to manage the task stack explicitly void TimeTasks::end_task(TimeTasks::Tasks taskid) { + if(!is_output_thread()) return; stack_depth[taskid]--; assert_ge(stack_depth[taskid],0); if(stack_depth[taskid]==0) @@ -96,6 +103,7 @@ void TimeTasks::end_task(TimeTasks::Tasks taskid) } void TimeTasks::end_communicating(double start_time) { + if(!is_output_thread()) return; assert(active_task); assert(communicating); double additional_communication_time = MPI_Wtime()-start_time; @@ -105,9 +113,10 @@ void TimeTasks::end_communicating(double start_time) #define TIMING_PREFIX "| " void TimeTasks::print_cycle_times(int cycle) { + if(!is_output_thread()) return; FILE* file = stdout; // we could report average for all processes - if(!MPIdata::get_rank()) + //if(!MPIdata::get_rank()) { fflush(file); fprintf(file,"=== times for cycle %d for rank %d === \n", @@ -147,9 +156,14 @@ void TimeTasks::print_cycle_times(int cycle) fprintf(file, TIMING_PREFIX "time subtask\n"); for(int e=LAST+1; e Date: Thu, 23 Jan 2014 05:27:13 +0100 Subject: [PATCH 092/118] created sumMoments_AoS(). code blowing up at 10 cycles. --- fields/EMfields3D.cpp | 167 +++++++++++++++++++++++++++++++++++++- include/EMfields3D.h | 1 + include/Particles3Dcomm.h | 1 + main/Parameters.cpp | 2 +- main/iPic3Dlib.cpp | 7 +- 5 files changed, 174 insertions(+), 4 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index c5af87f0..38f544c4 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -395,7 +395,7 @@ void EMfields3D::sumMoments(const Particles3Dcomm* part, Grid * grid, VirtualTop for (int i = 0; i < ns; i++) { const Particles3Dcomm& pcls = part[i]; - assert_eq(pcls.get_particleType(), ParticleType::AoS); + assert_eq(pcls.get_particleType(), ParticleType::SoA); const int is = pcls.get_ns(); assert_eq(i,is); @@ -586,6 +586,171 @@ void EMfields3D::sumMoments(const Particles3Dcomm* part, Grid * grid, VirtualTop } } +void EMfields3D::sumMoments_AoS( + const Particles3Dcomm* part, Grid * grid, VirtualTopology3D * vct) +{ + const double inv_dx = 1.0 / dx; + const double inv_dy = 1.0 / dy; + const double inv_dz = 1.0 / dz; + const int nxn = grid->getNXN(); + const int nyn = grid->getNYN(); + const int nzn = grid->getNZN(); + const double xstart = grid->getXstart(); + const double ystart = grid->getYstart(); + const double zstart = grid->getZstart(); + // To make memory use scale to a large number of threads, we + // could first apply an efficient parallel sorting algorithm + // to the particles and then accumulate moments in smaller + // subarrays. + //#ifdef _OPENMP + #pragma omp parallel + { + for (int species_idx = 0; species_idx < ns; species_idx++) + { + const Particles3Dcomm& pcls = part[species_idx]; + assert_eq(pcls.get_particleType(), ParticleType::AoS); + const int is = pcls.get_ns(); + assert_eq(species_idx,is); + + const int nop = pcls.getNOP(); + + int thread_num = omp_get_thread_num(); + { timeTasks_begin_task(TimeTasks::MOMENT_ACCUMULATION); } + Moments10& speciesMoments10 = fetch_moments10Array(thread_num); + arr4_double moments = speciesMoments10.fetch_arr(); + // + // moments.setmode(ompmode::mine); + // moments.setall(0.); + // + double *moments1d = &moments[0][0][0][0]; + int moments1dsize = moments.get_size(); + for(int i=0; igetXN(ix-1); + const double eta0 = pcl.get_y() - grid->getYN(iy-1); + const double zeta0 = pcl.get_z() - grid->getZN(iz-1); + const double xi1 = grid->getXN(ix) - pcl.get_x(); + const double eta1 = grid->getYN(iy) - pcl.get_y(); + const double zeta1 = grid->getZN(iz) - pcl.get_z(); + const double qi = pcl.get_q(); + const double invVOLqi = invVOL*qi; + const double weight0 = invVOLqi * xi0; + const double weight1 = invVOLqi * xi1; + const double weight00 = weight0*eta0; + const double weight01 = weight0*eta1; + const double weight10 = weight1*eta0; + const double weight11 = weight1*eta1; + double weights[8]; + weights[0] = weight00*zeta0; // weight000 + weights[1] = weight00*zeta1; // weight001 + weights[2] = weight01*zeta0; // weight010 + weights[3] = weight01*zeta1; // weight011 + weights[4] = weight10*zeta0; // weight100 + weights[5] = weight10*zeta1; // weight101 + weights[6] = weight11*zeta0; // weight110 + weights[7] = weight11*zeta1; // weight111 + //weights[0] = xi0 * eta0 * zeta0 * qi * invVOL; // weight000 + //weights[1] = xi0 * eta0 * zeta1 * qi * invVOL; // weight001 + //weights[2] = xi0 * eta1 * zeta0 * qi * invVOL; // weight010 + //weights[3] = xi0 * eta1 * zeta1 * qi * invVOL; // weight011 + //weights[4] = xi1 * eta0 * zeta0 * qi * invVOL; // weight100 + //weights[5] = xi1 * eta0 * zeta1 * qi * invVOL; // weight101 + //weights[6] = xi1 * eta1 * zeta0 * qi * invVOL; // weight110 + //weights[7] = xi1 * eta1 * zeta1 * qi * invVOL; // weight111 + + // add particle to moments + { + arr1_double_fetch momentsArray[8]; + arr2_double_fetch moments00 = moments[ix ][iy ]; + arr2_double_fetch moments01 = moments[ix ][iy-1]; + arr2_double_fetch moments10 = moments[ix-1][iy ]; + arr2_double_fetch moments11 = moments[ix-1][iy-1]; + momentsArray[0] = moments00[iz ]; // moments000 + momentsArray[1] = moments00[iz-1]; // moments001 + momentsArray[2] = moments01[iz ]; // moments010 + momentsArray[3] = moments01[iz-1]; // moments011 + momentsArray[4] = moments10[iz ]; // moments100 + momentsArray[5] = moments10[iz-1]; // moments101 + momentsArray[6] = moments11[iz ]; // moments110 + momentsArray[7] = moments11[iz-1]; // moments111 + + for(int m=0; m<10; m++) + for(int c=0; c<8; c++) + { + momentsArray[c][m] += velmoments[m]*weights[c]; + } + } + } + if(!thread_num) timeTasks_end_task(TimeTasks::MOMENT_ACCUMULATION); + + // reduction + if(!thread_num) timeTasks_begin_task(TimeTasks::MOMENT_REDUCTION); + + // reduce moments in parallel + // + for(int thread_num=0;thread_numsetZeroPrimaryMoments(); convertParticlesToSoA(); EMf->sumMoments(part, grid, vct); + //convertParticlesToAoS(); + //EMf->sumMoments_AoS(part, grid, vct); } //for (int i = 0; i < ns; i++) //{ @@ -265,12 +267,13 @@ bool c_Solver::ParticlesMover() { if(Parameters::get_VECTORIZE_MOVER()) { part[i].mover_PC_vectorized(grid, vct, EMf); + //part[i].mover_PC_AoS_XeonVec(grid, vct, EMf); } else { - //part[i].mover_PC(grid, vct, EMf); + part[i].mover_PC(grid, vct, EMf); + //part[i].mover_PC_AoS(grid, vct, EMf); //part[i].mover_PC_AoS2(grid, vct, EMf); - part[i].mover_PC_AoS(grid, vct, EMf); } } } From 9164960b345889329951190eb0dc72a4804fc5e4 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Wed, 29 Jan 2014 19:55:52 +0100 Subject: [PATCH 093/118] Fixed compile errors on Xeon Phi --- CMakeLists.txt | 116 +++++++++++++++++++------------------------------ 1 file changed, 44 insertions(+), 72 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fa72f8ee..43a72698 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 2.8.8) # compiler set in ../cmake/cmake_template.cmake.XeonPhi -message ("for Xeon Phi:") -message ("cmake .. -DCMAKE_TOOLCHAIN_FILE=../cmake/cmake_template.cmake.XeonPhi") +#message ("for Xeon Phi:") +#message ("cmake .. -DCMAKE_TOOLCHAIN_FILE=../cmake/cmake_template.cmake.XeonPhi") #message ("for Xeon:") #message ("cmake .. -DCMAKE_TOOLCHAIN_FILE=../cmake/cmake_template.cmake.Xeon") # @@ -21,13 +21,26 @@ set(LIBRARY_OUTPUT_PATH lib) # Set compiler flags per system # if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "k1om") ## Xeon Phi - set(CMAKE_CXX_FLAGS "-O3 -openmp -fno-exceptions -vec-report -mmic") + option(IPIC_XEONPHI "ipic xeon phi standard compile flags" on) + if(IPIC_XEONPHI) + set(CMAKE_CXX_FLAGS "-O3 -openmp -fno-exceptions -vec-report -mmic") + else() + set(CMAKE_CXX_FLAGS "-mmic") + endif() + #set(CMAKE_CXX_FLAGS "$(CMAKE_CXX_FLAGS) -mmic") + #set(CMAKE_CXX_FLAGS "-O3 -openmp -fno-exceptions -vec-report -mmic") #set(CMAKE_CXX_FLAGS "-openmp -g -mmic") # set flags for Xeon Phi, totalview elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64") ## Xeon - set(CMAKE_CXX_COMPILER "icpc") - set(CMAKE_CXX_FLAGS "-O3 -openmp -fno-exceptions -vec-report") + option(IPIC_XEON "icpc with optimization" on) + if(IPIC_XEON) + # reporting: -g -vec-report + # optimization: -O3 -xHost -fno-exceptions + set(CMAKE_CXX_FLAGS "-openmp -fno-exceptions -O3 -xHost -vec-report") + set(CMAKE_CXX_COMPILER "icpc") + endif() + #set(CMAKE_CXX_FLAGS "-O3 -openmp -g -xHost -fno-exceptions -vec-report") else() - set(CMAKE_CXX_FLAGS "-O3") + #set(CMAKE_CXX_FLAGS "-O3") endif() # @@ -86,7 +99,6 @@ file( communication/*.cpp fields/*.cpp grids/*.cpp - iPIC3D.cpp/*.cpp inputoutput/*.cpp mathlib/*.cpp mpidata/*.cpp @@ -99,50 +111,34 @@ file( ) # -# Compilation options +# Macro definitions # -set(IPIC_TESTS_DIR "${CMAKE_BINARY_DIR}/tests" CACHE STRING "Location of the source files for iPic3D") - -option(IPIC_PARALLEL_HDF5 "Output is done using the parallel HDF5 library" OFF) -if(IPIC_PARALLEL_HDF5) - add_definitions(-DPHDF5) -endif() - -option(IPIC_BATSRUS "Compile library with coupling code for BATS-R-US" OFF) -if(IPIC_BATSRUS) +set(TEST_B $ENV{BATSRUS}) +if(DEFINED TEST_B) add_definitions( -DBATSRUS ) -endif() - -option(IPIC_XEONPHI "Compile options for Xeon Phi" OFF) -if(IPIC_XEONPHI) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") -endif() - -option(IPIC_TESTS "Set up the code tests" OFF) - -option(IPIC_BUILD_SHARED "Compile shared library" OFF) -if(IPIC_BUILD_SHARED) - set(IPIC_BUILD_TYPE SHARED) -else() - set(IPIC_BUILD_TYPE STATIC) -endif() + message(" WARNING: BATSRUS flag is active.") +else(DEFINED TEST_B) + message(" INFO: BATSRUS is not active.") +endif(DEFINED TEST_B) # # Executable declaration # +# Particle solver add_executable( iPic3D iPic3D.cpp ) + #build iPic as a library also #libiPic3Dlib.so in folder lib add_library( - iPic3Dlib - ${IPIC_BUILD_TYPE} - ${inc_files} + iPic3Dlib #name of the library + SHARED #type of the library + ${inc_files} # stuff to build the library ${src_files} ) @@ -163,40 +159,16 @@ target_link_libraries( iPic3Dlib ) -## ## to save the executable in the folder where the CMakeLists.txt file is, i.e. CMAKE_CURRENT_SOURCE_DIR -## #set_target_properties(iPic3D PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) -## -## ## debug releases have a _d appended to the executable -## set_target_properties(iPic3D PROPERTIES DEBUG_POSTFIX "_d") -## -## -## message("Which system am I compiling for:") -## message("CMAKE_SYSTEM_PROCESSOR is ${CMAKE_SYSTEM_PROCESSOR}") -## -## message("Compiler & compiler flags:") -## message("CMAKE_CXX_COMPILER is ${CMAKE_CXX_COMPILER}") -## message("CMAKE_CXX_FLAGS is ${CMAKE_CXX_FLAGS}") - -# -# Code testing -# - -if(IPIC_TESTS) - enable_testing() - - add_test(NAME GEM-test - COMMAND ${CMAKE_COMMAND} - -DIPIC_TESTS_DIR=${IPIC_TESTS_DIR} - -DIPIC_SOURCE_DIR=${CMAKE_SOURCE_DIR} - -DIPICEXEC=$ - -DMPIEXEC=${MPIEXEC} - -DMPIEXEC_NUMPROC_FLAG=${MPIEXEC_NUMPROC_FLAG} - -DMPIEXEC_POSTFLAGS=${MPIEXEC_POSTFLAGS} - -DIPIC_TESTS_DIR=${IPIC_TESTS_DIR} - -P ${CMAKE_SOURCE_DIR}/testfiles/CMakeRunTest-GEM.txt) - - add_test(NAME uname-test - COMMAND ${CMAKE_COMMAND} - -P ${CMAKE_SOURCE_DIR}/testfiles/CMakeRunTest-uname.txt) - -endif(IPIC_TESTS) +## to save the executable in the folder where the CMakeLists.txt file is, i.e. CMAKE_CURRENT_SOURCE_DIR +#set_target_properties(iPic3D PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + +## debug releases have a _d appended to the executable +set_target_properties(iPic3D PROPERTIES DEBUG_POSTFIX "_d") + + +message("Which system am I compiling for:") +message("CMAKE_SYSTEM_PROCESSOR is ${CMAKE_SYSTEM_PROCESSOR}") + +message("Compiler & compiler flags:") +message("CMAKE_CXX_COMPILER is ${CMAKE_CXX_COMPILER}") +message("CMAKE_CXX_FLAGS is ${CMAKE_CXX_FLAGS}") From 6b150e824f546de5aa78bcfea2c3b8796813fc7a Mon Sep 17 00:00:00 2001 From: eajohnson Date: Thu, 30 Jan 2014 00:18:11 +0100 Subject: [PATCH 094/118] Implemented support for 'ipic run' and 'ipic show' commands --- scripts/ipic.py | 351 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 296 insertions(+), 55 deletions(-) diff --git a/scripts/ipic.py b/scripts/ipic.py index 1c9d90ad..069ed895 100755 --- a/scripts/ipic.py +++ b/scripts/ipic.py @@ -1,78 +1,278 @@ #!/usr/bin/env python +import os import sys +import subprocess +import socket # gethostname() +import re # regular expression +#from optparse import OptionParser import getopt # http://docs.python.org/2/library/collections.html#collections.deque from collections import deque # double-ended queue -import os -#from optparse import OptionParser - +import inspect +# # useful documentation: # # http://effbot.org/zone/python-list.htm # http://pymotw.com/2/subprocess/ # http://stackoverflow.com/questions/3777301/how-to-call-a-shell-script-from-python-code +def getdims(inputfile): + # extract dimensions from intput file + dims = [1, 1, 1] + pattern = re.compile(r'^\s*([\w]+)\s*=\s*([\w]+)') + f = open(inputfile) + for line in f: + # key, value = line.split('=') + #pattern.findall(line) + match = re.search(pattern, line) + if match: + var = match.group(1) + val = match.group(2) + if var == 'XLEN': + dims[0]=int(val) + elif var == 'YLEN': + dims[1] = int(val) + elif var == 'ZLEN': + dims[2] = int(val) + return dims + f.close() + +def lineno(): + """Returns the current line number in our program.""" + return inspect.currentframe().f_back.f_lineno + +def issue_command(command): + if(show): + print ' '.join(command) + else: + print '+', ' '.join(command) + subprocess.call(command); + +def issue_shell_command(command): + if(show): + print command + else: + print '+', command + os.system(command) + +def construct_run_command(args): + + # convert from deque to list for getopts + args = list(args) + + # set default values + num_max_threads = 1 + output = 'data' + inputfile = 'src/inputfiles/GEM.inp' + hostname = '' + mpirun = 'mpiexec' + global system + if system == 'xeon': + mpirun = 'mpiexec.hydra' # is this line needed? + num_max_threads = 4 + elif system == 'mic': + mpirun = 'mpiexec.hydra' + # this should be user configurable + num_max_threads = 50 + hostname = socket.gethostname() + micnum = 0 + hostname = hostname + '-mic' + str(micnum) + + try: + opts, args = getopt.getopt(args, 'i:o:s:t:h:', \ + ['input=', 'output=', 'system=', 'threads=', 'host=']) + except getopt.GetoptError, e: + if e.opt == 'h' and 'requires argument' in e.msg: + print 'ERROR: -h requires input filename' + elif e.opt == 'i' and 'requires argument' in e.msg: + print 'ERROR: -i requires input filename' + elif e.opt == 'o' and 'requires argument' in e.msg: + print 'ERROR: -o requires directory name' + elif e.opt == 't' and 'requires argument' in e.msg: + print 'ERROR: -t requires max number of threads' + elif e.opt == 's' and 'requires argument' in e.msg: + print 'ERROR: -s requires system name (e.g. "mic" or "xeon")' + else: + usage() + sys.exit(-1) + + for o, a in opts: + if o in ("-h", "--host"): + hostname = a + elif o in ("-i", "--input"): + inputfile = a + elif o in ("-o", "--output"): + output = a + print 'ERROR: -o is not yet supported' + sys.exit(1) + elif o in ("-t", "--threads"): + num_max_threads = int(a) + elif o in ("-s", "--system"): + system = a + #else: + # assert False, "unhandled option" + + if len(args)!=0: + usage(); + + # determine num_procs + dims = getdims(inputfile) + XLEN = dims[0] + YLEN = dims[1] + ZLEN = dims[2] + num_procs = XLEN*YLEN*ZLEN + # num_procs = 4 + + arguments = ['exec/iPic3D', inputfile]; + options = ['-n', str(num_procs)] + if hostname!="": + options.extend(['-host', hostname]) + + if num_max_threads > 1: + omp_string = 'OMP_NUM_THREADS=' + str(num_max_threads) + omp = ['-env', omp_string] + options.extend(omp) + + command = [mpirun] + command.extend(options) + command.extend(arguments) + return command + +def ipic_run(args): + command = construct_run_command(args); + issue_command(command) + +def ipic_show_run(args): + command = construct_run_command(args); + print ' '.join(command); + +def ipic_make_data(): + # create data subdirectory + create_data_command = '''mkdir -p data'''; + issue_shell_command(create_data_command) + +def ipic_cmake(args): + + # make src a link to the code + numargs = len(args) + if numargs==0: + sourcedir = '..' + elif numargs==1: + sourcedir = deque.popleft(args) + else: + usage() + sys.exit() + + if sourcedir!='src': + rm_command = ['rm -f', 'src']; + issue_command(rm_command); + ln_command = ['ln', '-s', str(sourcedir), 'src']; + issue_command(ln_command) + + ipic_make_data(); + # invoke cmake + cmake_command = ['cmake']; + if system == 'general': + 0 + elif system == 'mic': + cmake_command.extend(['-DCMAKE_TOOLCHAIN_FILE=src/cmake/cmake_template.cmake.XeonPhi']) + else: + print "--system", system, "is not supported" + sys.exit(-1) + # issue the command + cmake_command.extend(['src']) + issue_command(cmake_command) + def ipic_ctags(args): # create tags file using ctags create_tags_command = \ '''find . -name '*.cpp' -or -name '*.h' | grep -v unused | xargs ctags --extra=+qf''' - print create_tags_command - os.system(create_tags_command) + issue_shell_command(create_tags_command) # sort tags file sort_tags_command = '''LC_ALL=C sort -u tags -o tags''' - print sort_tags_command - os.system(sort_tags_command) + issue_shell_command(sort_tags_command) -def ipic_help(): +def ipic_show(args): + if len(args) == 0: + ipic_help_show(args) + sys.exit() + + command = deque.popleft(args) + if command == "run": + ipic_show_run(args) + #elif command == "cmake": + # ipic_show_cmake(args) + #elif command == "ctags": + # ipic_show_ctags(args) + else: + print "ipic show", command, "is not supported" + sys.exit(-1) + +def ipic_basic_help(): print ''' - To build, in the iPic3D directory you can use: + To build, you can use: - rm -rf build # if necessary mkdir build cd build - cmake .. + ''', progname, '''cmake /path/to/ipic3d make # or "make -j" to compile in parallel - To run the code you can use - - mkdir data - mpiexec -n 4 exec/iPic3D ../inputfiles/GEM.inp + Then to run the code, use: - where 4 = XLEN times YLEN times ZLEN (defined in GEM.inp). + ipic run + + If you prefer, use e.g. "ipic show run" to see the shell commands + that will be executed and then execute them directly yourself. Available subcommands: - ''', progname, '''help ctags - ''', progname, '''help mic - ''', progname, '''help deep + ''', progname, '''help show # show what a command would do + ''', progname, '''help run # execute iPic3D + ''', progname, '''help cmake # execute cmake and create subdirectories + ''', progname, '''help ctags # create ctags file to navigate code + ''', progname, '''help mic # help for running on mic + ''', progname, '''help deep # help for running on deep ''' +def ipic_help_show(args): + print ''' + ''', progname, '''show [command] + + show the shell command that would be executed by + ipic [command] + ''' + +def ipic_help_run(args): + print ''' + ''', progname, '''[-s ] run [options] + + run iPic3D with appropriate arguments. + + options: + -t : set maximum number of threads + (default is 1 unless -s is set) + -i : set input file (default is "src/inputfiles/GEM.inp") + -o : set output directory (default is "data") + -h : spawn processes on specified host + ''' + def ipic_help_mic(args): print ''' See "ipic help". Modifications are as follows. - To run on the Xeon host processor, use something like: + On the Xeon host processor, use: - mpiexec.hydra -n 8 -env OMP_NUM_THREADS=4 exec/iPic3D ../inputfiles/GEM.inp + ipic -s xeon [command] - where 8 = XLEN times YLEN times ZLEN. - - If you want to cross-compile for the MIC, then the instructions are - different: - - mkdir build.phi - cd build.phi - cmake .. -DCMAKE_TOOLCHAIN_FILE=../cmake/cmake_template.cmake.XeonPhi - make -j - - And to run you use, e.g.: - - mkdir data - mpiexec.hydra -host knc2-mic0 -n 50 -env OMP_NUM_THREADS=4 exec/iPic3D ../inputfiles/GEM.inp - - where 50 = XLEN times YLEN times ZLEN. + On the MIC, use + + ipic -s mic [command] + To show what a command will do, use e.g.: + + ipic show -s mic [command] + See also: ''', progname, '''help deep ''' @@ -89,6 +289,14 @@ def ipic_help_deep(args): ''', progname, '''help mic ''' +def ipic_help_cmake(args): + print ''' + ''', progname, '''[-s mic] cmake [sourcedir] + + [sourcedir]: the source code directory; by default ".." + [-s mic]: cross-compile for the mic system + ''' + def ipic_help_ctags(args): print ''' Make sure that you are in the source code directory @@ -164,16 +372,22 @@ def ipic_help_git(args): undo-commit = reset --soft HEAD~1 ''' -def help(args): +def ipic_help(args): if len(args) == 0: - ipic_help() + ipic_basic_help() sys.exit() command = deque.popleft(args) - if command == "mic": + if command == "show": + ipic_help_show(args) + elif command == "run": + ipic_help_run(args) + elif command == "mic": ipic_help_mic(args) elif command == "deep": ipic_help_deep(args) + elif command == "cmake": + ipic_help_cmake(args) elif command == "ctags": ipic_help_ctags(args) elif command == "git": @@ -183,20 +397,23 @@ def help(args): sys.exit(-1) def usage(): + theline = inspect.currentframe().f_back.f_lineno + print ' usage() called from ipic.py line ', str(theline) + print ''' - usage: ''', progname, ''' [options] + usage: ''', progname, ''' [show] Available commands: - ''', progname, '''ctags ''', progname, '''help + ''', progname, '''show + ''', progname, '''cmake + ''', progname, '''ctags ''' -def main(): +def ipic_command(argv1): - global progname - progname = os.path.basename(sys.argv[0]) - global dirname - dirname = os.path.dirname(sys.argv[0]) + global system + system = 'general' # it might be better to use the argparse module rather than getopt, # but unfortunately argparse is only available beginning with python 2.7 @@ -206,10 +423,10 @@ def main(): # before giving up on backward compatibility. # try: - opts, args = getopt.getopt(sys.argv[1:], 'ho:', ['help', 'output=']) + opts, args = getopt.getopt(argv1, 'hs:', ['help', 'system=']) except getopt.GetoptError, e: - if e.opt == 'o' and 'requires argument' in e.msg: - print 'ERROR: -o requires filename' + if e.opt == 's' and 'requires argument' in e.msg: + print 'ERROR: -s requires system name (e.g. "mic" or "xeon")' else: usage() sys.exit(-1) @@ -218,8 +435,8 @@ def main(): if o in ("-h", "--help"): usage() sys.exit() - elif o in ("-o", "--output"): - output = a + elif o in ("-s", "--system"): + system = a #else: # assert False, "unhandled option" @@ -234,17 +451,41 @@ def main(): #print list(args) if command == "help": - help(args) + ipic_help(args) + # elif command == "show": + # ipic_show(args) elif command == "ctags": ipic_ctags(args) - #print "ctags not yet implemented" + elif command == "cmake": + ipic_cmake(args) + elif command == "run": + ipic_run(args) else: - print progname, command, "not supported" + print progname, command, "is not supported" sys.exit(-1) #print os.path.basename(__file__) #print os.path.dirname(__file__) +def main(): + + global progname + progname = os.path.basename(sys.argv[0]) + global dirname + dirname = os.path.dirname(sys.argv[0]) + global show + show=0 + + argv1 = sys.argv[1:] + if len(argv1)==0: + usage() + + if argv1[0]=='show': + show=1 + argv1=argv1[1:] + + ipic_command(argv1) + if __name__ == '__main__': main() From 9f0425123547fd6c19a7934e76fce65ad979c82d Mon Sep 17 00:00:00 2001 From: eajohnson Date: Thu, 30 Jan 2014 14:52:32 +0100 Subject: [PATCH 095/118] ipic run: calculate number of threads based on number of processes --- scripts/ipic.py | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/scripts/ipic.py b/scripts/ipic.py index 069ed895..92b2bffc 100755 --- a/scripts/ipic.py +++ b/scripts/ipic.py @@ -62,23 +62,46 @@ def construct_run_command(args): args = list(args) # set default values + num_nodes = 1 num_max_threads = 1 + num_threads_per_node = 1 output = 'data' inputfile = 'src/inputfiles/GEM.inp' hostname = '' mpirun = 'mpiexec' global system - if system == 'xeon': + if system == 'xeon' or system == 'mic': + if system == 'xeon': mpirun = 'mpiexec.hydra' # is this line needed? - num_max_threads = 4 - elif system == 'mic': + # calculate number of threads per process + # - should extract this stuff from /proc/cpuinfo + num_nodes = 1 + num_processors_per_node = 2 + num_cores_per_processor = 8 + num_threads_per_core = 2 + num_threads_per_node = ( + num_threads_per_core * + num_cores_per_processor * + num_processors_per_node) + elif system == 'mic': mpirun = 'mpiexec.hydra' - # this should be user configurable - num_max_threads = 50 + # calculate number of threads per process + # - could use ssh to extract this stuff from /proc/cpuinfo + # on the machine we will run on + num_nodes = 1 + num_processors_per_node = 1 + num_cores_per_processor = 57 # 57 on knc2, 60 on knc1 + num_threads_per_core = 4 + num_threads_per_node = ( + num_threads_per_core * + num_cores_per_processor * + num_processors_per_node) + # hostname = socket.gethostname() micnum = 0 hostname = hostname + '-mic' + str(micnum) + num_threads_is_given_by_user = 0 try: opts, args = getopt.getopt(args, 'i:o:s:t:h:', \ ['input=', 'output=', 'system=', 'threads=', 'host=']) @@ -107,6 +130,7 @@ def construct_run_command(args): print 'ERROR: -o is not yet supported' sys.exit(1) elif o in ("-t", "--threads"): + num_threads_is_given_by_user = 1 num_max_threads = int(a) elif o in ("-s", "--system"): system = a @@ -122,7 +146,12 @@ def construct_run_command(args): YLEN = dims[1] ZLEN = dims[2] num_procs = XLEN*YLEN*ZLEN - # num_procs = 4 + num_procs_per_node = num_procs/num_nodes + num_threads_per_proc = num_threads_per_node/num_procs_per_node + + if not num_threads_is_given_by_user: + # rounding down is the correct behavior + num_max_threads = int(num_threads_per_proc) arguments = ['exec/iPic3D', inputfile]; options = ['-n', str(num_procs)] From ed23f3e0c8076a42a498d1e62ec0538c4a2f08e2 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Thu, 23 Jan 2014 18:17:42 +0100 Subject: [PATCH 096/118] regularized Parameters options for mover_PC --- fields/EMfields3D.cpp | 209 +++++++++++++++++++++++++++ include/EMfields3D.h | 1 + include/Parameters.h | 19 ++- include/Particles3Dcomm.h | 68 ++++++--- include/iPic3D.h | 2 + main/Parameters.cpp | 39 +++-- main/iPic3Dlib.cpp | 51 +++---- particles/Particles3D.cpp | 8 ++ particles/Particles3Dcomm.cpp | 258 +++++++++++++++++++++++++--------- 9 files changed, 532 insertions(+), 123 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 38f544c4..779c2948 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -1155,6 +1155,215 @@ void EMfields3D::sumMoments_vectorized( } } +void EMfields3D::sumMoments_vectorized_AoS( + const Particles3Dcomm* part, Grid * grid, VirtualTopology3D * vct) +{ + const double inv_dx = grid->get_invdx(); + const double inv_dy = grid->get_invdy(); + const double inv_dz = grid->get_invdz(); + const int nxn = grid->getNXN(); + const int nyn = grid->getNYN(); + const int nzn = grid->getNZN(); + const double xstart = grid->getXstart(); + const double ystart = grid->getYstart(); + const double zstart = grid->getZstart(); + #pragma omp parallel + { + for (int species_idx = 0; species_idx < ns; species_idx++) + { + const Particles3Dcomm& pcls = part[species_idx]; + assert_eq(pcls.get_particleType(), ParticleType::AoS); + const int is = pcls.get_ns(); + assert_eq(species_idx,is); + + const int nop = pcls.getNOP(); + #pragma omp master + { timeTasks_begin_task(TimeTasks::MOMENT_ACCUMULATION); } + Moments10& speciesMoments10 = fetch_moments10Array(0); + arr4_double moments = speciesMoments10.fetch_arr(); + // + // moments.setmode(ompmode::ompfor); + //moments.setall(0.); + double *moments1d = &moments[0][0][0][0]; + int moments1dsize = moments.get_size(); + #pragma omp for // because shared + for(int i=0; igetCartesian_rank() == 0) diff --git a/include/EMfields3D.h b/include/EMfields3D.h index 32599b9f..eb261311 100644 --- a/include/EMfields3D.h +++ b/include/EMfields3D.h @@ -127,6 +127,7 @@ class EMfields3D // :public Field void sumMoments(const Particles3Dcomm* part, Grid * grid, VirtualTopology3D * vct); void sumMoments_AoS(const Particles3Dcomm* part, Grid * grid, VirtualTopology3D * vct); void sumMoments_vectorized(const Particles3Dcomm* part, Grid * grid, VirtualTopology3D * vct); + void sumMoments_vectorized_AoS(const Particles3Dcomm* part, Grid * grid, VirtualTopology3D * vct); void sumMomentsOld(const Particles3Dcomm& pcls, Grid * grid, VirtualTopology3D * vct); /*! add accumulated moments to the moments for a given species */ //void addToSpeciesMoments(const TenMoments & in, int is); diff --git a/include/Parameters.h b/include/Parameters.h index ba4e980d..84f6f00e 100644 --- a/include/Parameters.h +++ b/include/Parameters.h @@ -5,11 +5,26 @@ // namespace Parameters { + enum MoverType + { + SoA=0, + AoS, + SoAvec_onesort, + AoSvec_onesort, + SoAvec_resort, + AoSvec_resort, + }; + void init_parameters(); + bool get_USING_AOS(); + bool get_SORTING_SOA(); bool get_SORTING_PARTICLES(); + // for resorting particles with each iteration of mover + bool get_RESORTING_PARTICLES(); + inline bool get_USING_XAVG() { return get_RESORTING_PARTICLES(); } bool get_VECTORIZE_MOMENTS(); - bool get_VECTORIZE_MOVER(); - bool get_USING_XAVG(); + //bool get_VECTORIZE_MOVER(); + MoverType get_MOVER_TYPE(); } #endif diff --git a/include/Particles3Dcomm.h b/include/Particles3Dcomm.h index 79e3fb0e..c1e72289 100644 --- a/include/Particles3Dcomm.h +++ b/include/Particles3Dcomm.h @@ -73,6 +73,7 @@ class Particles3Dcomm // :public Particles /*! sort particles for vectorized push (needs to be parallelized) */ void sort_particles_serial(Grid * grid, VirtualTopology3D * vct); + void sort_particles_serial_AoS(Grid * grid, VirtualTopology3D * vct); /*! sort particles with respect to provided position data */ void sort_particles_serial( pfloat *xpos, pfloat *ypos, pfloat *zpos, @@ -118,10 +119,29 @@ class Particles3Dcomm // :public Particles assert_le(cz,nzc); } + // get accessors for optional arrays + // + SpeciesParticle *fetch_pcls(){ return _pcls; } + SpeciesParticle *fetch_pclstmp(){ return _pclstmp; } + double * fetch_xavg() { return _xavg; } + double * fetch_yavg() { return _yavg; } + double * fetch_zavg() { return _zavg; } + double * fetch_xtmp() { return _xtmp; } + double * fetch_ytmp() { return _ytmp; } + double * fetch_ztmp() { return _ztmp; } + double * fetch_utmp() { return _utmp; } + double * fetch_vtmp() { return _vtmp; } + double * fetch_wtmp() { return _wtmp; } + double * fetch_qtmp() { return _qtmp; } + double * fetch_xavgtmp() { return _xavgtmp; } + double * fetch_yavgtmp() { return _yavgtmp; } + double * fetch_zavgtmp() { return _zavgtmp; } + long long *fetch_ParticleIDtmp(){ return _ParticleIDtmp; } + // inline get accessors // ParticleType::Type get_particleType()const { return particleType; } - const SpeciesParticle& get_pcl(int pidx)const{ return pcls[pidx]; } + const SpeciesParticle& get_pcl(int pidx)const{ return _pcls[pidx]; } double *getXall() const { return (x); } double *getYall() const { return (y); } double *getZall() const { return (z); } @@ -202,8 +222,6 @@ class Particles3Dcomm // :public Particles ParticleType::Type particleType; // particles data // - // AoS representation - SpeciesParticle *pcls; // SoA representation // /** Positions array - X component */ @@ -224,30 +242,34 @@ class Particles3Dcomm // :public Particles bool TrackParticleID; /** ParticleID */ long long *ParticleID; - /** Average position data (used during particle push) **/ - double *xavg; - double *yavg; - double *zavg; + // + // AoS representation + // + SpeciesParticle *_pcls; // structures for sorting particles // - // alternate storage for sorting particles + /** Average position data (used during particle push) **/ + // + double *_xavg; + double *_yavg; + double *_zavg; + // + // alternate temporary storage for sorting particles + // + long long *_ParticleIDtmp; + double *_xtmp; + double *_ytmp; + double *_ztmp; + double *_utmp; + double *_vtmp; + double *_wtmp; + double *_qtmp; + SpeciesParticle *_pclstmp; + double *_xavgtmp; + double *_yavgtmp; + double *_zavgtmp; // - double *xtmp; - double *ytmp; - double *ztmp; - double *utmp; - double *vtmp; - double *wtmp; - double *qtmp; - long long *ParticleIDtmp; - double *xavgtmp; - double *yavgtmp; - double *zavgtmp; - //int *xcell; - //int *ycell; - //int *zcell; - // references for buckets // array3_int* numpcls_in_bucket; diff --git a/include/iPic3D.h b/include/iPic3D.h index 6d7d1063..02e7e604 100644 --- a/include/iPic3D.h +++ b/include/iPic3D.h @@ -49,6 +49,8 @@ namespace iPic3D { void convertParticlesToSoA(); void convertParticlesToAoS(); + private: + void sortParticles(); private: static MPIdata * mpi; diff --git a/main/Parameters.cpp b/main/Parameters.cpp index 00d99b7a..e220e387 100644 --- a/main/Parameters.cpp +++ b/main/Parameters.cpp @@ -2,17 +2,40 @@ using namespace Parameters; +//********** edit these parameters ********* +// +bool Parameters::get_VECTORIZE_MOMENTS() { return false; } +// options: SoA AoS SoAvec_onesort AoSvec_onesort SoAvec_resort AoSvec_resort +Parameters::MoverType Parameters::get_MOVER_TYPE() { return SoA; } + +//********** derived parameters ********* + static bool SORTING_PARTICLES; +static bool RESORTING_PARTICLES; +static bool USING_AOS; +static bool SORTING_SOA; void Parameters::init_parameters() { - SORTING_PARTICLES = get_VECTORIZE_MOMENTS() || get_VECTORIZE_MOVER(); + RESORTING_PARTICLES = + get_MOVER_TYPE()==SoAvec_resort + || get_MOVER_TYPE()==AoSvec_resort; + SORTING_PARTICLES = get_VECTORIZE_MOMENTS() + || get_MOVER_TYPE()==SoAvec_onesort + || get_MOVER_TYPE()==AoSvec_onesort + || get_MOVER_TYPE()==SoAvec_resort + || get_MOVER_TYPE()==AoSvec_resort; + USING_AOS = + get_MOVER_TYPE()==AoS + || get_MOVER_TYPE()==AoSvec_onesort + || get_MOVER_TYPE()==AoSvec_resort; + SORTING_SOA = get_VECTORIZE_MOMENTS() + || get_MOVER_TYPE()==SoAvec_onesort + || get_MOVER_TYPE()==SoAvec_resort; } -//bool Parameters::get_SORTING_PARTICLES() { return SORTING_PARTICLES; } -bool Parameters::get_SORTING_PARTICLES() { return true; } -bool Parameters::get_VECTORIZE_MOMENTS() { return false; } -bool Parameters::get_VECTORIZE_MOVER() { return false; } -// this must also return true if we communicate particles per iteration -//bool Parameters::get_USING_XAVG() { return get_VECTORIZE_MOVER(); } -bool Parameters::get_USING_XAVG() { return get_SORTING_PARTICLES(); } +bool Parameters::get_RESORTING_PARTICLES() { return RESORTING_PARTICLES; } +bool Parameters::get_SORTING_PARTICLES() { return SORTING_PARTICLES; } +bool Parameters::get_SORTING_SOA() { return SORTING_SOA; } +bool Parameters::get_USING_AOS() { return USING_AOS; } +// diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 6676b8e4..3f7115c7 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -176,36 +176,35 @@ int c_Solver::Init(int argc, char **argv) { return 0; } -void c_Solver::CalculateMoments() { +void c_Solver::sortParticles() { + timeTasks_begin_task(TimeTasks::MOMENT_PCL_SORTING); + for(int species_idx=0; species_idxupdateInfoFields(grid,vct,col); - if(Parameters::get_SORTING_PARTICLES()) - { - // sort particles - //#pragma omp master - { - //dprint(omp_get_thread_num()); - timeTasks_begin_task(TimeTasks::MOMENT_PCL_SORTING); - for(int species_idx=0; species_idxsumMoments_vectorized(part, grid, vct); + //convertParticlesToAoS(); + //sortParticles(); + //EMf->sumMoments_vectorized_AoS(part, grid, vct); } else { + if(Parameters::get_SORTING_PARTICLES()) + sortParticles(); + EMf->setZeroPrimaryMoments(); convertParticlesToSoA(); EMf->sumMoments(part, grid, vct); @@ -264,16 +263,20 @@ bool c_Solver::ParticlesMover() { // // should merely pass EMf->get_fieldForPcls() rather than EMf. // use the Predictor Corrector scheme to move particles - if(Parameters::get_VECTORIZE_MOVER()) - { - part[i].mover_PC_vectorized(grid, vct, EMf); - //part[i].mover_PC_AoS_XeonVec(grid, vct, EMf); - } - else + switch(Parameters::get_MOVER_TYPE()) { - part[i].mover_PC(grid, vct, EMf); - //part[i].mover_PC_AoS(grid, vct, EMf); - //part[i].mover_PC_AoS2(grid, vct, EMf); + case Parameters::SoA: + part[i].mover_PC(grid, vct, EMf); + break; + case Parameters::SoAvec_resort: + part[i].mover_PC_vectorized(grid, vct, EMf); + break; + case Parameters::AoS: + part[i].mover_PC_AoS(grid, vct, EMf); + //part[i].mover_PC_AoS2(grid, vct, EMf); + break; + default: + unsupported_value_error(Parameters::get_MOVER_TYPE()); } } } diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 1d1f567b..3a4a2e80 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -460,6 +460,7 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { void Particles3D::mover_PC_AoS2(Grid * grid, VirtualTopology3D * vct, Field * EMf) { convertParticlesToAoS(); + SpeciesParticle * pcls = fetch_pcls(); #pragma omp master if (vct->getCartesian_rank() == 0) { cout << "*** MOVER species " << ns << " ***" << NiterMover << " ITERATIONS ****" << endl; @@ -611,6 +612,7 @@ void Particles3D::mover_PC_AoS(Grid * grid, VirtualTopology3D * vct, Field * EMf } const_arr4_pfloat fieldForPcls = EMf->get_fieldForPcls(); + SpeciesParticle * pcls = fetch_pcls(); #pragma omp master { timeTasks_begin_task(TimeTasks::MOVER_PCL_MOVING); } const double dto2 = .5 * dt, qdto2mc = qom * dto2 / c; @@ -765,6 +767,9 @@ void Particles3D::mover_PC_vectorized( Grid * grid, VirtualTopology3D * vct, Field * EMf) { convertParticlesToSoA(); + double* xavg = fetch_xavg(); + double* yavg = fetch_yavg(); + double* zavg = fetch_zavg(); assert_eq(nxc,nxn-1); assert_eq(nyc,nyn-1); assert_eq(nzc,nzn-1); @@ -840,6 +845,9 @@ void Particles3D::mover_PC_vectorized( ALIGNED(u); ALIGNED(v); ALIGNED(w); + ALIGNED(xavg); + ALIGNED(yavg); + ALIGNED(zavg); // This pragma help on Xeon but hurts on Xeon Phi. // On the Phi we could accelerate by processing two particles at a time. #pragma simd diff --git a/particles/Particles3Dcomm.cpp b/particles/Particles3Dcomm.cpp index 4533266d..f5f49e63 100644 --- a/particles/Particles3Dcomm.cpp +++ b/particles/Particles3Dcomm.cpp @@ -58,7 +58,6 @@ Particles3Dcomm::Particles3Dcomm(){ } /** deallocate particles */ Particles3Dcomm::~Particles3Dcomm() { - delete[]pcls; delete[]x; delete[]y; delete[]z; @@ -67,21 +66,26 @@ Particles3Dcomm::~Particles3Dcomm() { delete[]w; delete[]q; delete[]ParticleID; - delete[]xavg; - delete[]yavg; - delete[]zavg; + // AoS representation + delete[]_pcls; + // average position used in particle advance + delete[]_xavg; + delete[]_yavg; + delete[]_zavg; // deallocate alternate storage - delete[]xtmp; - delete[]ytmp; - delete[]ztmp; - delete[]utmp; - delete[]vtmp; - delete[]wtmp; - delete[]qtmp; - delete[]ParticleIDtmp; - delete[]xavgtmp; - delete[]yavgtmp; - delete[]zavgtmp; + delete[]_xtmp; + delete[]_ytmp; + delete[]_ztmp; + delete[]_utmp; + delete[]_vtmp; + delete[]_wtmp; + delete[]_qtmp; + delete[]_ParticleIDtmp; + delete[] _pclstmp; + // extra xavg for sort + delete[]_xavgtmp; + delete[]_yavgtmp; + delete[]_zavgtmp; // deallocate buffers delete[]b_X_RIGHT; delete[]b_X_LEFT; @@ -190,11 +194,11 @@ void Particles3Dcomm::allocate(int species, CollectiveIO * col, VirtualTopology3 // intel new allocates with 64-byte alignment // since particles are 64 bytes wide, every particle // is aligned. - pcls = new SpeciesParticle[npmax]; + _pcls = new SpeciesParticle[npmax]; particleType = ParticleType::SoA; #ifdef __INTEL_COMPILER assert_eq(sizeof(SpeciesParticle),64); - ALIGNED(pcls); + ALIGNED(_pcls); #endif // // SoA particle representation @@ -210,50 +214,66 @@ void Particles3Dcomm::allocate(int species, CollectiveIO * col, VirtualTopology3 // charge q = new double[npmax]; // average positions, used in iterative particle advance - xavg = 0; - yavg = 0; - zavg = 0; + _xavg = 0; + _yavg = 0; + _zavg = 0; + //if(Parameters::get_USING_XAVG()) + //{ + // xavg = new double[npmax]; + // yavg = new double[npmax]; + // zavg = new double[npmax]; + //} + _xtmp = 0; + _ytmp = 0; + _ztmp = 0; + _utmp = 0; + _vtmp = 0; + _wtmp = 0; + _qtmp = 0; + _xavgtmp = 0; + _yavgtmp = 0; + _zavgtmp = 0; + _pcls = 0; + _pclstmp = 0; + // accessors for data that should be allocated only if needed + // if(Parameters::get_USING_XAVG()) { - xavg = new double[npmax]; - yavg = new double[npmax]; - zavg = new double[npmax]; + _xavg=new double[npmax]; + _yavg=new double[npmax]; + _zavg=new double[npmax]; } - // - xtmp = 0; - ytmp = 0; - ztmp = 0; - utmp = 0; - vtmp = 0; - wtmp = 0; - qtmp = 0; - xavgtmp = 0; - yavgtmp = 0; - zavgtmp = 0; - if(Parameters::get_SORTING_PARTICLES()) + if(Parameters::get_SORTING_SOA()) + { + _xtmp=new double[npmax]; + _ytmp=new double[npmax]; + _ztmp=new double[npmax]; + _utmp=new double[npmax]; + _vtmp=new double[npmax]; + _wtmp=new double[npmax]; + _qtmp=new double[npmax]; + _xavgtmp=new double[npmax]; + _yavgtmp=new double[npmax]; + _zavgtmp=new double[npmax]; + if(TrackParticleID) + { + _ParticleIDtmp = new long long[npmax]; + } + } + if(Parameters::get_USING_AOS()) { - xtmp = new double[npmax]; - ytmp = new double[npmax]; - ztmp = new double[npmax]; - // velocities - utmp = new double[npmax]; - vtmp = new double[npmax]; - wtmp = new double[npmax]; - // charge - qtmp = new double[npmax]; - // average positions, used in iterative particle advance - xavgtmp = new double[npmax]; - yavgtmp = new double[npmax]; - zavgtmp = new double[npmax]; + assert_eq(sizeof(SpeciesParticle),64); + _pcls = AlignedAlloc(SpeciesParticle,npmax); + _pclstmp = AlignedAlloc(SpeciesParticle,npmax); } ParticleID = 0; - ParticleIDtmp = 0; + _ParticleIDtmp = 0; // ID if (TrackParticleID) { ParticleID = new long long[npmax]; - if(Parameters::get_SORTING_PARTICLES()) - ParticleIDtmp = new long long[npmax]; + //if(Parameters::get_SORTING_PARTICLES()) + // _ParticleIDtmp = new long long[npmax]; BirthRank[0] = vct->getCartesian_rank(); if (vct->getNprocs() > 1) BirthRank[1] = (int) ceil(log10((double) (vct->getNprocs()))); // Number of digits needed for # of process in ID @@ -999,7 +1019,89 @@ void Particles3Dcomm::PrintNp(VirtualTopology3D * ptVCT) const { void Particles3Dcomm::sort_particles_serial(Grid * grid, VirtualTopology3D * vct) { - sort_particles_serial(x,y,z, grid,vct); + switch(particleType) + { + case ParticleType::AoS: + sort_particles_serial_AoS(grid,vct); + break; + case ParticleType::SoA: + sort_particles_serial(x,y,z, grid,vct); + break; + default: + unsupported_value_error(particleType); + } +} + +// need to sort and communicate particles after each iteration +void Particles3Dcomm::sort_particles_serial_AoS( + Grid * grid, VirtualTopology3D * vct) +{ + SpeciesParticle* pcls = fetch_pcls(); + SpeciesParticle* pclstmp = fetch_pclstmp(); + { + numpcls_in_bucket->setall(0); + // iterate through particles and count where they will go + for (int pidx = 0; pidx < nop; pidx++) + { + const SpeciesParticle& pcl = get_pcl(pidx); + // get the cell indices of the particle + int cx,cy,cz; + get_safe_cell_for_pos(cx,cy,cz,pcl.get_x(),pcl.get_y(),pcl.get_z()); + + // increment the number of particles in bucket of this particle + (*numpcls_in_bucket)[cx][cy][cz]++; + } + + // compute prefix sum to determine initial position + // of each bucket (could parallelize this) + // + int accpcls=0; + for(int cx=0;cxsetall(0); + // put the particles where they are supposed to go + for (int pidx = 0; pidx < nop; pidx++) + { + const SpeciesParticle& pcl = get_pcl(pidx); + // get the cell indices of the particle + int cx,cy,cz; + get_safe_cell_for_pos(cx,cy,cz,pcl.get_x(),pcl.get_y(),pcl.get_z()); + + // compute where the data should go + const int numpcls_now = (*numpcls_in_bucket_now)[cx][cy][cz]++; + const int outpidx = (*bucket_offset)[cx][cy][cz] + numpcls_now; + assert_lt(outpidx, nop); + assert_ge(outpidx, 0); + assert_lt(pidx, nop); + assert_ge(pidx, 0); + + // copy particle data to new location + // + pclstmp[outpidx] = pcl; + } + // swap the tmp particle memory with the official particle memory + { + swap(_pclstmp,_pcls); + } + + // check if the particles were sorted incorrectly + if(true) + { + for(int cx=0;cxsetall(0); // iterate through particles and count where they will go @@ -1071,24 +1184,35 @@ void Particles3Dcomm::sort_particles_serial( wtmp[outpidx] = w[pidx]; qtmp[outpidx] = q[pidx]; if (TrackParticleID) + { ParticleIDtmp[outpidx] = ParticleID[pidx]; - xavgtmp[outpidx] = xavg[pidx]; - yavgtmp[outpidx] = yavg[pidx]; - zavgtmp[outpidx] = zavg[pidx]; + } + if(_xavg) + { + double* xavg = fetch_xavg(); + double* yavg = fetch_yavg(); + double* zavg = fetch_zavg(); + double* xavgtmp = fetch_xavgtmp(); + double* yavgtmp = fetch_yavgtmp(); + double* zavgtmp = fetch_zavgtmp(); + xavgtmp[outpidx] = xavg[pidx]; + yavgtmp[outpidx] = yavg[pidx]; + zavgtmp[outpidx] = zavg[pidx]; + } } // swap the tmp particle memory with the official particle memory { - swap(xtmp,x); - swap(ytmp,y); - swap(ztmp,z); - swap(utmp,u); - swap(vtmp,v); - swap(wtmp,w); - swap(qtmp,q); - swap(ParticleIDtmp,ParticleID); - swap(xavgtmp,xavg); - swap(yavgtmp,yavg); - swap(zavgtmp,zavg); + swap(_xtmp,x); + swap(_ytmp,y); + swap(_ztmp,z); + swap(_utmp,u); + swap(_vtmp,v); + swap(_wtmp,w); + swap(_qtmp,q); + swap(_ParticleIDtmp,ParticleID); + swap(_xavgtmp,_xavg); + swap(_yavgtmp,_yavg); + swap(_zavgtmp,_zavg); } // check if the particles were sorted incorrectly @@ -1239,6 +1363,7 @@ void Particles3Dcomm::copyParticlesToSoA() { timeTasks_set_task(TimeTasks::TRANSPOSE_PCLS_TO_SOA); dprintf("copying to struct of arrays"); + SpeciesParticle const*const pcls = fetch_pcls(); #pragma omp for for(int pidx=0; pidx Date: Sat, 25 Jan 2014 15:23:43 +0100 Subject: [PATCH 097/118] fixed AoS bugs and turned off sorting and vectorization in Parameters.cpp --- fields/EMfields3D.cpp | 21 +-- include/Parameters.h | 13 +- include/Particle.h | 68 +++---- include/Particles3D.h | 1 + include/Particles3Dcomm.h | 6 +- include/ompdefs.h | 22 +++ main/Parameters.cpp | 21 ++- main/iPic3Dlib.cpp | 50 ++++-- particles/Particles3D.cpp | 322 ++++++++++++++++++++++++++-------- particles/Particles3Dcomm.cpp | 285 ++++++++++++++++++++++++++---- 10 files changed, 613 insertions(+), 196 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 779c2948..8034f792 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -425,7 +425,7 @@ void EMfields3D::sumMoments(const Particles3Dcomm* part, Grid * grid, VirtualTop // The following loop is expensive, so it is wise to assume that the // compiler is stupid. Therefore we should on the one hand // expand things out and on the other hand avoid repeating computations. - #pragma omp for nowait + #pragma omp for // used nowait with the old way for (int i = 0; i < nop; i++) { // compute the quadratic moments of velocity @@ -1158,6 +1158,7 @@ void EMfields3D::sumMoments_vectorized( void EMfields3D::sumMoments_vectorized_AoS( const Particles3Dcomm* part, Grid * grid, VirtualTopology3D * vct) { + dprint("entering") const double inv_dx = grid->get_invdx(); const double inv_dy = grid->get_invdy(); const double inv_dz = grid->get_invdz(); @@ -1233,18 +1234,18 @@ void EMfields3D::sumMoments_vectorized_AoS( memset(momentsAcc,0,sizeof(double)*8*10); for(int pidx=bucket_offset; pidxget_u(); + const double vi=pcl->get_v(); + const double wi=pcl->get_w(); const double uui=ui*ui; const double uvi=ui*vi; const double uwi=ui*wi; @@ -1266,9 +1267,9 @@ void EMfields3D::sumMoments_vectorized_AoS( // compute the weights to distribute the moments // //double weights[8]; - const double abs_xpos = pcl.get_x(); - const double abs_ypos = pcl.get_y(); - const double abs_zpos = pcl.get_z(); + const double abs_xpos = pcl->get_x(); + const double abs_ypos = pcl->get_y(); + const double abs_zpos = pcl->get_z(); const double rel_xpos = abs_xpos - xstart; const double rel_ypos = abs_ypos - ystart; const double rel_zpos = abs_zpos - zstart; @@ -1293,7 +1294,7 @@ void EMfields3D::sumMoments_vectorized_AoS( const double w0y = 1-w1y; const double w0z = 1-w1z; // we are calculating a charge moment. - const double qi=pcl.get_q(); + const double qi=pcl->get_q(); const double weight0 = qi*w0x; const double weight1 = qi*w1x; const double weight00 = weight0*w0y; diff --git a/include/Parameters.h b/include/Parameters.h index 84f6f00e..fac04e9e 100644 --- a/include/Parameters.h +++ b/include/Parameters.h @@ -5,10 +5,14 @@ // namespace Parameters { - enum MoverType + enum Enum { - SoA=0, - AoS, + SoA=0, // struct of arrays + AoS, // array of structs + // for moments type + AoSvec, + SoAvec, + // for mover type SoAvec_onesort, AoSvec_onesort, SoAvec_resort, @@ -25,6 +29,7 @@ namespace Parameters inline bool get_USING_XAVG() { return get_RESORTING_PARTICLES(); } bool get_VECTORIZE_MOMENTS(); //bool get_VECTORIZE_MOVER(); - MoverType get_MOVER_TYPE(); + Enum get_MOVER_TYPE(); + Enum get_MOMENTS_TYPE(); } #endif diff --git a/include/Particle.h b/include/Particle.h index a17c5368..660ce9f4 100644 --- a/include/Particle.h +++ b/include/Particle.h @@ -61,55 +61,39 @@ class SpeciesParticle // intended to occupy 64 bytes // -// to be used when sorting with every particle advance -struct CellParticle +// species particle for second-order-accuracy implicit advance +class ISpcl { - long long ID; // 8 bytes - int cx[3]; // mesh cell - float fx[3]; // mesh cell position (fraction) + long long ID; + double x[3]; + float hdx[3]; // xavg = x + hdx float u[3]; - float fxavg[3]; // for implicit push - float q; // float m would be better for stitching to MHD for dusty plasma - float qom; // for dusty plasma + double q; public: // accessors - // - // read access long long get_ID()const{ return ID; } - float get_fx()const{ return fx[0]; } - float get_fy()const{ return fx[1]; } - float get_fz()const{ return fx[2]; } - float get_u()const{ return u[0]; } - float get_v()const{ return u[1]; } - float get_w()const{ return u[2]; } - float get_q()const{ return q; } + double get_x(int i)const{ return x[i]; } + double get_hdx(int i)const{ return hdx[i]; } + float get_u(int i)const{ return u[i]; } + double get_q()const{ return q; } void set_ID(long long in){ ID=in; } - // write access - void set_u(float in){ u[0]=in; } - void set_v(float in){ u[1]=in; } - void set_w(float in){ u[2]=in; } - - void init(const SpeciesParticle& pcl, - double cxstart[3], // starting position of cell coordinates - float dx_inv[3], - float _qom) - { - ID = pcl.get_ID(); - // position in mesh coordinates - // + void set_x(int i, double in) { x[i] = in; } + void set_hdx(int i, float in) { hdx[i] = in; } + void set_u(int i, double in) { u[i] = in; } + void set_q(double in) { q = in; } +}; - float xpos[3]; - for(int i=0;i<3;i++) - { - float xpos = (pcl.get_x(i)-cxstart[i])*dx_inv[i]; - float cxpos = floor(xpos); - cx[i] = int(cxpos); - fxavg[i] = fx[i] = cxpos - cx[i]; - u[i] = pcl.get_u(i); - } - q = pcl.get_q(); - qom = _qom; - } +// intended to occupy 64 bytes +// +// dust particle for second-order-accuracy implicit advance +class IDpcl +{ + long long ID; + double x[3]; // could replace with cell index and float x + float hdx[3]; // xavg = x + hdx + float u[3]; + float qom; // charge to mass ratio of particle + float m; // mass of particle }; #endif diff --git a/include/Particles3D.h b/include/Particles3D.h index 05a8701a..58c8fb49 100644 --- a/include/Particles3D.h +++ b/include/Particles3D.h @@ -61,6 +61,7 @@ class Particles3D:public Particles3Dcomm { /** array-of-structs version of mover_PC */ void mover_PC_AoS2(Grid * grid, VirtualTopology3D * vct, Field * EMf); void mover_PC_AoS(Grid * grid, VirtualTopology3D * vct, Field * EMf); + void mover_PC_AoS_vec(Grid * grid, VirtualTopology3D * vct, Field * EMf); /** vectorized version of mover_PC **/ void mover_PC_vectorized(Grid * grid, VirtualTopology3D * vct, Field * EMf); /** communicate particle after moving them */ diff --git a/include/Particles3Dcomm.h b/include/Particles3Dcomm.h index c1e72289..3299d243 100644 --- a/include/Particles3Dcomm.h +++ b/include/Particles3Dcomm.h @@ -72,12 +72,10 @@ class Particles3Dcomm // :public Particles void convertParticlesToSoA(); /*! sort particles for vectorized push (needs to be parallelized) */ + void sort_particles_serial_SoA_by_xavg(Grid * grid, VirtualTopology3D * vct); void sort_particles_serial(Grid * grid, VirtualTopology3D * vct); void sort_particles_serial_AoS(Grid * grid, VirtualTopology3D * vct); - /*! sort particles with respect to provided position data */ - void sort_particles_serial( - pfloat *xpos, pfloat *ypos, pfloat *zpos, - Grid * grid, VirtualTopology3D * vct); + void sort_particles_serial_SoA(Grid * grid, VirtualTopology3D * vct); void get_safe_cell_for_pos( int& cx, int& cy, int& cz, pfloat xpos, pfloat ypos, pfloat zpos) diff --git a/include/ompdefs.h b/include/ompdefs.h index 2c16779f..f3640c36 100644 --- a/include/ompdefs.h +++ b/include/ompdefs.h @@ -9,6 +9,28 @@ #else inline int omp_get_thread_num() { return 0;} inline int omp_get_max_threads(){ return 1;} +#define omp_set_num_threads(num_threads) #endif +class Caller_to_SetMaxThreadsForScope{ + int max_threads; + public: + Caller_to_SetMaxThreadsForScope(int i) + { + max_threads = omp_get_max_threads(); + // omp_set_num_threads should have been + // called omp_set_max_threads + omp_set_num_threads(i); + } + ~Caller_to_SetMaxThreadsForScope() + { + // restore the original maximum number of threads + omp_set_num_threads(max_threads); + } +}; + +#define set_max_threads_for_scope(num_threads) \ + Caller_to_SetMaxThreadsForScope \ + instanceOfCaller_to_SetMaxThreadsForScope(num_threads); + #endif diff --git a/main/Parameters.cpp b/main/Parameters.cpp index e220e387..ca8ed480 100644 --- a/main/Parameters.cpp +++ b/main/Parameters.cpp @@ -5,9 +5,10 @@ using namespace Parameters; //********** edit these parameters ********* // bool Parameters::get_VECTORIZE_MOMENTS() { return false; } -// options: SoA AoS SoAvec_onesort AoSvec_onesort SoAvec_resort AoSvec_resort -Parameters::MoverType Parameters::get_MOVER_TYPE() { return SoA; } - +// supported options: SoA AoS +Parameters::Enum Parameters::get_MOMENTS_TYPE() { return SoA; } +// supported options: SoA AoS AoSvec_onesort SoAvec_resort +Parameters::Enum Parameters::get_MOVER_TYPE() { return SoA; } //********** derived parameters ********* static bool SORTING_PARTICLES; @@ -25,17 +26,23 @@ void Parameters::init_parameters() || get_MOVER_TYPE()==AoSvec_onesort || get_MOVER_TYPE()==SoAvec_resort || get_MOVER_TYPE()==AoSvec_resort; - USING_AOS = - get_MOVER_TYPE()==AoS - || get_MOVER_TYPE()==AoSvec_onesort - || get_MOVER_TYPE()==AoSvec_resort; SORTING_SOA = get_VECTORIZE_MOMENTS() || get_MOVER_TYPE()==SoAvec_onesort || get_MOVER_TYPE()==SoAvec_resort; + USING_AOS = + get_MOMENTS_TYPE()==AoS + || get_MOVER_TYPE()==AoS + || get_MOVER_TYPE()==AoSvec_onesort + || get_MOVER_TYPE()==AoSvec_resort; } bool Parameters::get_RESORTING_PARTICLES() { return RESORTING_PARTICLES; } bool Parameters::get_SORTING_PARTICLES() { return SORTING_PARTICLES; } bool Parameters::get_SORTING_SOA() { return SORTING_SOA; } bool Parameters::get_USING_AOS() { return USING_AOS; } + +//bool Parameters::get_RESORTING_PARTICLES() { return true; } +//bool Parameters::get_SORTING_PARTICLES() { return true; } +//bool Parameters::get_SORTING_SOA() { return true; } +//bool Parameters::get_USING_AOS() { return true; } // diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 3f7115c7..9f60f114 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -191,25 +191,42 @@ void c_Solver::CalculateMoments() { if(Parameters::get_VECTORIZE_MOMENTS()) { - // since particles are sorted, - // we can vectorize interpolation of particles to grid - convertParticlesToSoA(); - sortParticles(); - EMf->sumMoments_vectorized(part, grid, vct); - //convertParticlesToAoS(); - //sortParticles(); - //EMf->sumMoments_vectorized_AoS(part, grid, vct); + switch(Parameters::get_MOMENTS_TYPE()) + { + case Parameters::SoA: + // since particles are sorted, + // we can vectorize interpolation of particles to grid + convertParticlesToSoA(); + sortParticles(); + EMf->sumMoments_vectorized(part, grid, vct); + break; + case Parameters::AoS: + convertParticlesToAoS(); + sortParticles(); + EMf->sumMoments_vectorized_AoS(part, grid, vct); + break; + default: + unsupported_value_error(Parameters::get_MOMENTS_TYPE()); + } } else { if(Parameters::get_SORTING_PARTICLES()) - sortParticles(); - - EMf->setZeroPrimaryMoments(); - convertParticlesToSoA(); - EMf->sumMoments(part, grid, vct); - //convertParticlesToAoS(); - //EMf->sumMoments_AoS(part, grid, vct); + sortParticles(); + switch(Parameters::get_MOMENTS_TYPE()) + { + case Parameters::SoA: + EMf->setZeroPrimaryMoments(); + convertParticlesToSoA(); + EMf->sumMoments(part, grid, vct); + break; + case Parameters::AoS: + convertParticlesToAoS(); + EMf->sumMoments_AoS(part, grid, vct); + break; + default: + unsupported_value_error(Parameters::get_MOMENTS_TYPE()); + } } //for (int i = 0; i < ns; i++) //{ @@ -275,6 +292,9 @@ bool c_Solver::ParticlesMover() { part[i].mover_PC_AoS(grid, vct, EMf); //part[i].mover_PC_AoS2(grid, vct, EMf); break; + case Parameters::AoSvec_onesort: + part[i].mover_PC_AoS_vec(grid, vct, EMf); + break; default: unsupported_value_error(Parameters::get_MOVER_TYPE()); } diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 3a4a2e80..dd0af36b 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -317,34 +317,34 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { if (vct->getCartesian_rank() == 0) { cout << "*** MOVER species " << ns << " ***" << NiterMover << " ITERATIONS ****" << endl; } - const_arr4_pfloat fieldForPcls = EMf->get_fieldForPcls(); + const_arr4_double fieldForPcls = EMf->get_fieldForPcls(); #pragma omp master { timeTasks_begin_task(TimeTasks::MOVER_PCL_MOVING); } - const pfloat dto2 = .5 * dt, qdto2mc = qom * dto2 / c; + const double dto2 = .5 * dt, qdto2mc = qom * dto2 / c; #pragma omp for schedule(static) // why does single precision make no difference in execution speed? //#pragma simd vectorlength(VECTOR_WIDTH) for (int pidx = 0; pidx < nop; pidx++) { // copy the particle - const pfloat xorig = x[pidx]; - const pfloat yorig = y[pidx]; - const pfloat zorig = z[pidx]; - const pfloat uorig = u[pidx]; - const pfloat vorig = v[pidx]; - const pfloat worig = w[pidx]; - pfloat xavg = xorig; - pfloat yavg = yorig; - pfloat zavg = zorig; - pfloat uavg; - pfloat vavg; - pfloat wavg; + const double xorig = x[pidx]; + const double yorig = y[pidx]; + const double zorig = z[pidx]; + const double uorig = u[pidx]; + const double vorig = v[pidx]; + const double worig = w[pidx]; + double xavg = xorig; + double yavg = yorig; + double zavg = zorig; + double uavg; + double vavg; + double wavg; // calculate the average velocity iteratively for (int innter = 0; innter < NiterMover; innter++) { // interpolation G-->P - const pfloat ixd = floor((xavg - xstart) * inv_dx); - const pfloat iyd = floor((yavg - ystart) * inv_dy); - const pfloat izd = floor((zavg - zstart) * inv_dz); + const double ixd = floor((xavg - xstart) * inv_dx); + const double iyd = floor((yavg - ystart) * inv_dy); + const double izd = floor((zavg - zstart) * inv_dz); // interface of index to right of cell int ix = 2 + int(ixd); int iy = 2 + int(iyd); @@ -363,12 +363,12 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { //const int cy = iy - 1; //const int cz = iz - 1; - const pfloat xi0 = xavg - grid->get_pfloat_XN(ix-1); - const pfloat eta0 = yavg - grid->get_pfloat_YN(iy-1); - const pfloat zeta0 = zavg - grid->get_pfloat_ZN(iz-1); - const pfloat xi1 = grid->get_pfloat_XN(ix) - xavg; - const pfloat eta1 = grid->get_pfloat_YN(iy) - yavg; - const pfloat zeta1 = grid->get_pfloat_ZN(iz) - zavg; + const double xi0 = xavg - grid->getXN(ix-1); + const double eta0 = yavg - grid->getYN(iy-1); + const double zeta0 = zavg - grid->getZN(iz-1); + const double xi1 = grid->getXN(ix) - xavg; + const double eta1 = grid->getYN(iy) - yavg; + const double zeta1 = grid->getZN(iz) - zavg; pfloat Exl = 0.0; pfloat Eyl = 0.0; @@ -404,7 +404,7 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { // creating these aliases seems to accelerate this method by about 30% // on the Xeon host, processor, suggesting deficiency in the optimizer. // - arr1_pfloat_get field_components[8]; + arr1_double_get field_components[8]; field_components[0] = fieldForPcls[ix ][iy ][iz ]; // field000 field_components[1] = fieldForPcls[ix ][iy ][iz-1]; // field001 field_components[2] = fieldForPcls[ix ][iy-1][iz ]; // field010 @@ -551,7 +551,7 @@ void Particles3D::mover_PC_AoS2(Grid * grid, VirtualTopology3D * vct, Field * EM // creating these aliases seems to accelerate this method by about 30% // on the Xeon host, processor, suggesting deficiency in the optimizer. // - arr1_pfloat_get field_components[8]; + arr1_double_get field_components[8]; field_components[0] = fieldForPcls[ix ][iy ][iz ]; // field000 field_components[1] = fieldForPcls[ix ][iy ][iz-1]; // field001 field_components[2] = fieldForPcls[ix ][iy-1][iz ]; // field010 @@ -603,6 +603,7 @@ void Particles3D::mover_PC_AoS2(Grid * grid, VirtualTopology3D * vct, Field * EM #pragma omp master { timeTasks_end_task(TimeTasks::MOVER_PCL_MOVING); } } + void Particles3D::mover_PC_AoS(Grid * grid, VirtualTopology3D * vct, Field * EMf) { convertParticlesToAoS(); @@ -621,13 +622,14 @@ void Particles3D::mover_PC_AoS(Grid * grid, VirtualTopology3D * vct, Field * EMf //#pragma simd vectorlength(VECTOR_WIDTH) for (int pidx = 0; pidx < nop; pidx++) { // copy the particle - SpeciesParticle& pcl = pcls[pidx]; - const double xorig = pcl.get_x(); - const double yorig = pcl.get_y(); - const double zorig = pcl.get_z(); - const double uorig = pcl.get_u(); - const double vorig = pcl.get_v(); - const double worig = pcl.get_w(); + SpeciesParticle* pcl = &pcls[pidx]; + ALIGNED(pcl); + const double xorig = pcl->get_x(); + const double yorig = pcl->get_y(); + const double zorig = pcl->get_z(); + const double uorig = pcl->get_u(); + const double vorig = pcl->get_v(); + const double worig = pcl->get_w(); double xavg = xorig; double yavg = yorig; double zavg = zorig; @@ -709,7 +711,7 @@ void Particles3D::mover_PC_AoS(Grid * grid, VirtualTopology3D * vct, Field * EMf // creating these aliases seems to accelerate this method by about 30% // on the Xeon host, processor, suggesting deficiency in the optimizer. // - arr1_pfloat_get field_components[8]; + arr1_double_get field_components[8]; field_components[0] = fieldForPcls[ix][iy][iz]; // field000 field_components[1] = fieldForPcls[ix][iy][cz]; // field001 field_components[2] = fieldForPcls[ix][cy][iz]; // field010 @@ -751,25 +753,189 @@ void Particles3D::mover_PC_AoS(Grid * grid, VirtualTopology3D * vct, Field * EMf zavg = zorig + wavg * dto2; } // end of iteration // update the final position and velocity - pcl.set_x(xorig + uavg * dt); - pcl.set_y(yorig + vavg * dt); - pcl.set_z(zorig + wavg * dt); - pcl.set_u(2.0 * uavg - uorig); - pcl.set_v(2.0 * vavg - vorig); - pcl.set_w(2.0 * wavg - worig); + pcl->set_x(xorig + uavg * dt); + pcl->set_y(yorig + vavg * dt); + pcl->set_z(zorig + wavg * dt); + pcl->set_u(2.0 * uavg - uorig); + pcl->set_v(2.0 * vavg - vorig); + pcl->set_w(2.0 * wavg - worig); } // END OF ALL THE PARTICLES #pragma omp master { timeTasks_end_task(TimeTasks::MOVER_PCL_MOVING); } } +// this currently computes garbage but execution time +// suggests bound on performance. For correct execution +// would need to sort by xavg with each iteration +// like in mover_PC_vectorized +void Particles3D::mover_PC_AoS_vec( + Grid * grid, VirtualTopology3D * vct, Field * EMf) +{ + convertParticlesToAoS(); + #pragma omp master + if (vct->getCartesian_rank() == 0) { + cout << "*** MOVER species " << ns << " ***" << NiterMover << " ITERATIONS ****" << endl; + } + const_arr4_pfloat fieldForPcls = EMf->get_fieldForPcls(); + + SpeciesParticle * pcls = fetch_pcls(); + #pragma omp master + { timeTasks_begin_task(TimeTasks::MOVER_PCL_MOVING); } + const double dto2 = .5 * dt, qdto2mc = qom * dto2 / c; + + #pragma omp for collapse(2) // schedule(static) + for(int cx=0;cxget_x(); + const pfloat yorig = pcl->get_y(); + const pfloat zorig = pcl->get_z(); + const pfloat uorig = pcl->get_u(); + const pfloat vorig = pcl->get_v(); + const pfloat worig = pcl->get_w(); + double xavg = xorig; + double yavg = yorig; + double zavg = zorig; + double uavg; + double vavg; + double wavg; + // calculate the average velocity iteratively + for (int innter = 0; innter < NiterMover; innter++) { + + // compute weights for field components + // + double weights[8]; + // xstart marks start of domain excluding ghosts + const double rel_xpos = xavg - xstart; + const double rel_ypos = yavg - ystart; + const double rel_zpos = zavg - zstart; + // cell position minus 1 (due to ghost cells) + const double cxm1_pos = rel_xpos * inv_dx; + const double cym1_pos = rel_ypos * inv_dy; + const double czm1_pos = rel_zpos * inv_dz; + // + int cx = 1 + int(floor(cxm1_pos)); + int cy = 1 + int(floor(cym1_pos)); + int cz = 1 + int(floor(czm1_pos)); + + // if the cell is outside the domain, then treat it as + // in the nearest ghost cell. + // + if (cx < 0) cx = 0; + if (cy < 0) cy = 0; + if (cz < 0) cz = 0; + // number of cells in x direction including ghosts is nxc + if (cx >= nxc) cx = nxc-1; + if (cy >= nyc) cy = nyc-1; + if (cz >= nzc) cz = nzc-1; + + // index of interface to right of cell + const int ix = cx + 1; + const int iy = cy + 1; + const int iz = cz + 1; + + // fraction of the distance from the right of the cell + const double w1x = cx - cxm1_pos; + const double w1y = cy - cym1_pos; + const double w1z = cz - czm1_pos; + // fraction of distance from the left + const double w0x = 1-w1x; + const double w0y = 1-w1y; + const double w0z = 1-w1z; + // + weights[0] = w0x*w0y*w0z; // weight000 + weights[1] = w0x*w0y*w1z; // weight001 + weights[2] = w0x*w1y*w0z; // weight010 + weights[3] = w0x*w1y*w1z; // weight011 + weights[4] = w1x*w0y*w0z; // weight100 + weights[5] = w1x*w0y*w1z; // weight101 + weights[6] = w1x*w1y*w0z; // weight110 + weights[7] = w1x*w1y*w1z; // weight111 + + pfloat Exl = 0.0; + pfloat Eyl = 0.0; + pfloat Ezl = 0.0; + pfloat Bxl = 0.0; + pfloat Byl = 0.0; + pfloat Bzl = 0.0; + + for(int c=0; c<8; c++) + { + Bxl += weights[c] * field_components[c][0]; + Byl += weights[c] * field_components[c][1]; + Bzl += weights[c] * field_components[c][2]; + Exl += weights[c] * field_components[c][3]; + Eyl += weights[c] * field_components[c][4]; + Ezl += weights[c] * field_components[c][5]; + } + const double Omx = qdto2mc*Bxl; + const double Omy = qdto2mc*Byl; + const double Omz = qdto2mc*Bzl; + + // end interpolation + const pfloat omsq = (Omx * Omx + Omy * Omy + Omz * Omz); + const pfloat denom = 1.0 / (1.0 + omsq); + // solve the position equation + const pfloat ut = uorig + qdto2mc * Exl; + const pfloat vt = vorig + qdto2mc * Eyl; + const pfloat wt = worig + qdto2mc * Ezl; + //const pfloat udotb = ut * Bxl + vt * Byl + wt * Bzl; + const pfloat udotOm = ut * Omx + vt * Omy + wt * Omz; + // solve the velocity equation + uavg = (ut + (vt * Omz - wt * Omy + udotOm * Omx)) * denom; + vavg = (vt + (wt * Omx - ut * Omz + udotOm * Omy)) * denom; + wavg = (wt + (ut * Omy - vt * Omx + udotOm * Omz)) * denom; + // update average position + xavg = xorig + uavg * dto2; + yavg = yorig + vavg * dto2; + zavg = zorig + wavg * dto2; + } + // update the final position and velocity + pcl->set_x(xorig + uavg * dt); + pcl->set_y(yorig + vavg * dt); + pcl->set_z(zorig + wavg * dt); + pcl->set_u(2.0 * uavg - uorig); + pcl->set_v(2.0 * vavg - vorig); + pcl->set_w(2.0 * wavg - worig); + } + } + #pragma omp master + { timeTasks_end_task(TimeTasks::MOVER_PCL_MOVING); } +} + /** mover with a Predictor-Corrector scheme */ void Particles3D::mover_PC_vectorized( Grid * grid, VirtualTopology3D * vct, Field * EMf) { convertParticlesToSoA(); - double* xavg = fetch_xavg(); - double* yavg = fetch_yavg(); - double* zavg = fetch_zavg(); assert_eq(nxc,nxn-1); assert_eq(nyc,nyn-1); assert_eq(nzc,nzn-1); @@ -783,9 +949,9 @@ void Particles3D::mover_PC_vectorized( #pragma omp for schedule(static) for(int pidx = 0; pidx < nop; pidx++) { - xavg[pidx] = x[pidx]; - yavg[pidx] = y[pidx]; - zavg[pidx] = z[pidx]; + _xavg[pidx] = x[pidx]; + _yavg[pidx] = y[pidx]; + _zavg[pidx] = z[pidx]; } const pfloat dto2 = .5 * dt, qdto2mc = qom * dto2 / c; @@ -797,7 +963,8 @@ void Particles3D::mover_PC_vectorized( #pragma omp master { timeTasks_begin_task(TimeTasks::MOVER_PCL_SORTING); - sort_particles_serial(xavg, yavg, zavg, grid,vct); + // this changes the definitions of x,y,z,u,v,w,_xavg,_yavg,_zavg,etc. + sort_particles_serial_SoA_by_xavg(grid,vct); timeTasks_end_task(TimeTasks::MOVER_PCL_SORTING); } #pragma omp barrier @@ -811,6 +978,15 @@ void Particles3D::mover_PC_vectorized( //const int ncells=nxc*nyc*nzc; //int *numpcls_in_bucket_1d = &numpcls_in_bucket[0][0][0]; //int *bucket_offset_1d = &bucket_offset[0][0][0]; + ALIGNED(x); + ALIGNED(y); + ALIGNED(z); + ALIGNED(u); + ALIGNED(v); + ALIGNED(w); + ALIGNED(_xavg); + ALIGNED(_yavg); + ALIGNED(_zavg); int serial_pidx = 0; #pragma omp for collapse(2) // schedule(static) for(int cx=0;cxsetall(0); + // put the particles where they are supposed to go for (int pidx = 0; pidx < nop; pidx++) { // get the cell indices of the particle // int cx,cy,cz; - get_safe_cell_for_pos(cx,cy,cz,xpos[pidx],ypos[pidx],zpos[pidx]); + get_safe_cell_for_pos(cx,cy,cz,x[pidx],y[pidx],z[pidx]); // //cx = xcell[pidx]; //cy = ycell[pidx]; @@ -1187,17 +1192,161 @@ void Particles3Dcomm::sort_particles_serial( { ParticleIDtmp[outpidx] = ParticleID[pidx]; } - if(_xavg) + } + // swap the tmp particle memory with the official particle memory + { + swap(_xtmp,x); + swap(_ytmp,y); + swap(_ztmp,z); + swap(_utmp,u); + swap(_vtmp,v); + swap(_wtmp,w); + swap(_qtmp,q); + swap(_ParticleIDtmp,ParticleID); + } + + // check that the number of bins was correct + // + if(true) + { + for(int cx=0;cxsetall(0); + // iterate through particles and count where they will go + for (int pidx = 0; pidx < nop; pidx++) + { + // get the cell indices of the particle + // + int cx,cy,cz; + get_safe_cell_for_pos(cx,cy,cz,xavg[pidx],yavg[pidx],zavg[pidx]); + // + // is it better just to recompute this? + // + //xcell[pidx]=cx; + //ycell[pidx]=cy; + //zcell[pidx]=cz; + + // increment the number of particles in bucket of this particle + (*numpcls_in_bucket)[cx][cy][cz]++; + } + + // compute prefix sum to determine initial position + // of each bucket (could parallelize this) + // + int accpcls=0; + for(int cx=0;cxsetall(0); + + // put the particles where they are supposed to go + for (int pidx = 0; pidx < nop; pidx++) + { + // get the cell indices of the particle + // + int cx,cy,cz; + get_safe_cell_for_pos(cx,cy,cz,xavg[pidx],yavg[pidx],zavg[pidx]); + // + //cx = xcell[pidx]; + //cy = ycell[pidx]; + //cz = zcell[pidx]; + + // compute where the data should go + const int numpcls_now = (*numpcls_in_bucket_now)[cx][cy][cz]++; + const int outpidx = (*bucket_offset)[cx][cy][cz] + numpcls_now; + assert_lt(outpidx, nop); + assert_ge(outpidx, 0); + assert_lt(pidx, nop); + assert_ge(pidx, 0); + + // copy particle data to new location + // + xtmp[outpidx] = x[pidx]; + ytmp[outpidx] = y[pidx]; + ztmp[outpidx] = z[pidx]; + utmp[outpidx] = u[pidx]; + vtmp[outpidx] = v[pidx]; + wtmp[outpidx] = w[pidx]; + qtmp[outpidx] = q[pidx]; + xavgtmp[outpidx] = xavg[pidx]; + yavgtmp[outpidx] = yavg[pidx]; + zavgtmp[outpidx] = zavg[pidx]; + if (TrackParticleID) + { + ParticleIDtmp[outpidx] = ParticleID[pidx]; } } // swap the tmp particle memory with the official particle memory @@ -1214,8 +1363,12 @@ void Particles3Dcomm::sort_particles_serial( swap(_yavgtmp,_yavg); swap(_zavgtmp,_zavg); } + xavg = _xavg; + yavg = _yavg; + zavg = _zavg; - // check if the particles were sorted incorrectly + // check that the number of bins was correct + // if(true) { for(int cx=0;cx Date: Fri, 31 Jan 2014 01:10:24 +0100 Subject: [PATCH 098/118] fixed bug in "ipic cmake" command --- scripts/ipic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ipic.py b/scripts/ipic.py index 141ed769..d043f2ff 100755 --- a/scripts/ipic.py +++ b/scripts/ipic.py @@ -194,7 +194,7 @@ def ipic_cmake(args): sys.exit() if sourcedir!='src': - rm_command = ['rm -f', 'src']; + rm_command = ['rm', '-f', 'src']; issue_command(rm_command); ln_command = ['ln', '-s', str(sourcedir), 'src']; issue_command(ln_command) From af34642a03e6311bf6403c6ff095b5339f839698 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 3 Feb 2014 18:01:23 +0100 Subject: [PATCH 099/118] fixed ipic cmake --- scripts/ipic.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/ipic.py b/scripts/ipic.py index d043f2ff..f00b018c 100755 --- a/scripts/ipic.py +++ b/scripts/ipic.py @@ -216,7 +216,10 @@ def ipic_cmake(args): def ipic_findcpph(args): # create tags file using ctags command = '''find . -name '*.cpp' -or -name '*.h' | grep -v unused | grep -v postprocessing_tools''' - issue_shell_command(cmake_command) + if(show): + print command + else: + os.system(command) def ipic_ctags(args): # create tags file using ctags From 6c7cd962139078d2684c4c5df90a15da4d3bfe2d Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 31 Jan 2014 16:31:58 +0100 Subject: [PATCH 100/118] implemented mover_PC_AoS_vec --- fields/EMfields3D.cpp | 1 + grids/Grid3DCU.cpp | 3 + include/Grid3DCU.h | 110 ++++++++ include/Parameters.h | 8 +- include/Particles3D.h | 10 +- include/Particles3Dcomm.h | 62 ++--- include/arraysfwd.h | 2 + main/Parameters.cpp | 22 +- main/iPic3Dlib.cpp | 8 +- particles/Particles3D.cpp | 487 +++++++++++++++------------------- particles/Particles3Dcomm.cpp | 16 +- 11 files changed, 376 insertions(+), 353 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 923985e1..2bcc6fef 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -422,6 +422,7 @@ void EMfields3D::sumMoments(const Particles3Dcomm* part, Grid * grid, VirtualTop int moments1dsize = moments.get_size(); for(int i=0; igetLx() / col->getNxc(); dy = col->getLy() / col->getNyc(); dz = col->getLz() / col->getNzc(); diff --git a/include/Grid3DCU.h b/include/Grid3DCU.h index c0a60e7e..92651196 100644 --- a/include/Grid3DCU.h +++ b/include/Grid3DCU.h @@ -139,6 +139,11 @@ class Grid3DCU // :public Grid double invdz; /** invol = inverse of volume*/ double invVOL; + /** index of last cell including ghost cells */ + // (precomputed for speed) + int cxlast; // nxc-1; + int cylast; // nyc-1; + int czlast; // nzc-1; /** node coordinate */ pfloat *pfloat_node_xcoord; pfloat *pfloat_node_ycoord; @@ -201,6 +206,111 @@ class Grid3DCU // :public Grid double getZstart() { return (zStart); } double getZend() { return (zEnd); } double getInvVOL() { return (invVOL); } + + // inline methods to calculate mesh cell and weights. + static void get_weights(double weights[8], + double w0x, double w0y, double w0z, + double w1x, double w1y, double w1z) + { + // which of the following is faster? + // + // this: + // + //const double weight00 = w0x*w0y; + //const double weight01 = w0x*w1y; + //const double weight10 = w1x*w0y; + //const double weight11 = w1x*w1y; + //weights[0] = weight00*w0z; // weight000 + //weights[1] = weight00*w1z; // weight001 + //weights[2] = weight01*w0z; // weight010 + //weights[3] = weight01*w1z; // weight011 + //weights[4] = weight10*w0z; // weight100 + //weights[5] = weight10*w1z; // weight101 + //weights[6] = weight11*w0z; // weight110 + //weights[7] = weight11*w1z; // weight111 + // + // or this: + // + weights[0] = w0x*w0y*w0z; // weight000 + weights[1] = w0x*w0y*w1z; // weight001 + weights[2] = w0x*w1y*w0z; // weight010 + weights[3] = w0x*w1y*w1z; // weight011 + weights[4] = w1x*w0y*w0z; // weight100 + weights[5] = w1x*w0y*w1z; // weight101 + weights[6] = w1x*w1y*w0z; // weight110 + weights[7] = w1x*w1y*w1z; // weight111 + } + void get_cell_coordinates( + int& cx, int& cy, int& cz, + double xpos, double ypos, double zpos) + { + // xStart marks start of domain excluding ghosts + const double rel_xpos = xpos - xStart; + const double rel_ypos = ypos - yStart; + const double rel_zpos = zpos - zStart; + // cell position minus 1 (due to ghost cells) + const double cxm1_pos = rel_xpos * invdx; + const double cym1_pos = rel_ypos * invdy; + const double czm1_pos = rel_zpos * invdz; + cx = 1 + int(floor(cxm1_pos)); + cy = 1 + int(floor(cym1_pos)); + cz = 1 + int(floor(czm1_pos)); + } + void make_cell_coordinates_safe(int& cx, int& cy, int& cz) + { + // if the cell is outside the domain, then treat it as + // in the nearest ghost cell. + // + if (cx < 0) cx = 0; + if (cy < 0) cy = 0; + if (cz < 0) cz = 0; + if (cx > cxlast) cx = cxlast; //nxc-1; + if (cy > cylast) cy = cylast; //nyc-1; + if (cz > czlast) cz = czlast; //nzc-1; + } + void get_safe_cell_coordinates( + int& cx, int& cy, int& cz, + double x, double y, double z) + { + get_cell_coordinates(cx,cy,cz,x,y,z); + make_cell_coordinates_safe(cx,cy,cz); + } + void get_safe_cell_and_weights( + double xpos, double ypos, double zpos, + int &cx, int& cy, int& cz, + double weights[8]) + { + //convert_xpos_to_cxpos(xpos,ypos,zpos,cxpos,cypos,czpos); + // xStart marks start of domain excluding ghosts + const double rel_xpos = xpos - xStart; + const double rel_ypos = ypos - yStart; + const double rel_zpos = zpos - zStart; + // cell position minus 1 (due to ghost cells) + const double cxm1_pos = rel_xpos * invdx; + const double cym1_pos = rel_ypos * invdy; + const double czm1_pos = rel_zpos * invdz; + // + cx = 1 + int(floor(cxm1_pos)); + cy = 1 + int(floor(cym1_pos)); + cz = 1 + int(floor(czm1_pos)); + + make_cell_coordinates_safe(cx,cy,cz); + + // fraction of the distance from the right of the cell + const double w1x = cx - cxm1_pos; + const double w1y = cy - cym1_pos; + const double w1z = cz - czm1_pos; + // fraction of distance from the left + const double w0x = 1.-w1x; + const double w0y = 1.-w1y; + const double w0z = 1.-w1z; + + get_weights(weights, w0x, w0y, w0z, w1x, w1y, w1z); + } + void get_safe_cell_and_weights(double xpos[3], int cx[3], double weights[8]) + { + get_safe_cell_and_weights(xpos[0],xpos[1],xpos[2],cx[0],cx[1],cx[2],weights); + } }; typedef Grid3DCU Grid; diff --git a/include/Parameters.h b/include/Parameters.h index fac04e9e..819ea7f0 100644 --- a/include/Parameters.h +++ b/include/Parameters.h @@ -13,10 +13,10 @@ namespace Parameters AoSvec, SoAvec, // for mover type - SoAvec_onesort, - AoSvec_onesort, - SoAvec_resort, - AoSvec_resort, + SoA_vec_onesort, + AoS_vec_onesort, + SoA_vec_resort, + AoS_vec_resort, }; void init_parameters(); diff --git a/include/Particles3D.h b/include/Particles3D.h index 58c8fb49..ce0595e6 100644 --- a/include/Particles3D.h +++ b/include/Particles3D.h @@ -59,9 +59,11 @@ class Particles3D:public Particles3Dcomm { /** mover with a Predictor-Corrector Scheme */ void mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf); /** array-of-structs version of mover_PC */ - void mover_PC_AoS2(Grid * grid, VirtualTopology3D * vct, Field * EMf); void mover_PC_AoS(Grid * grid, VirtualTopology3D * vct, Field * EMf); + /* vectorized version of previous */ void mover_PC_AoS_vec(Grid * grid, VirtualTopology3D * vct, Field * EMf); + /* this computes garbage */ + void mover_PC_AoS_vec_onesort(Grid * grid, VirtualTopology3D * vct, Field * EMf); /** vectorized version of mover_PC **/ void mover_PC_vectorized(Grid * grid, VirtualTopology3D * vct, Field * EMf); /** communicate particle after moving them */ @@ -84,6 +86,12 @@ class Particles3D:public Particles3Dcomm { void MaxwellianFromFluidCell(Grid* grid, Collective *col, int is, int i, int j, int k, int &ip, double *x, double *y, double *z, double *q, double *vx, double *vy, double *vz, unsigned long* ParticleID); #endif + private: + + inline void get_field_components_for_cell( + arr1_double_get field_components[8], + const_arr4_double fieldForPcls, + int cx,int cy,int cz); }; diff --git a/include/Particles3Dcomm.h b/include/Particles3Dcomm.h index 3299d243..5c97a8ba 100644 --- a/include/Particles3Dcomm.h +++ b/include/Particles3Dcomm.h @@ -76,46 +76,6 @@ class Particles3Dcomm // :public Particles void sort_particles_serial(Grid * grid, VirtualTopology3D * vct); void sort_particles_serial_AoS(Grid * grid, VirtualTopology3D * vct); void sort_particles_serial_SoA(Grid * grid, VirtualTopology3D * vct); - void get_safe_cell_for_pos( - int& cx, int& cy, int& cz, - pfloat xpos, pfloat ypos, pfloat zpos) - { - // xstart is left edge of domain excluding ghost cells - // cx=0 for ghost cell layer. - cx = 1 + int(floor((xpos - xstart) * inv_dx)); - cy = 1 + int(floor((ypos - ystart) * inv_dy)); - cz = 1 + int(floor((zpos - zstart) * inv_dz)); - // - // if the cell is outside the domain, then treat it as - // in the nearest ghost cell. - // - if (cx < 0) cx = 0; - if (cy < 0) cy = 0; - if (cz < 0) cz = 0; - // number of cells in x direction including ghosts is nxc - if (cx >= nxc) cx = nxc-1; - if (cy >= nyc) cy = nyc-1; - if (cz >= nzc) cz = nzc-1; - } - - /*! version that assumes particle is in domain */ - void get_cell_for_pos_in_domain( - int& cx, int& cy, int& cz, - pfloat xpos, pfloat ypos, pfloat zpos) - { - // xstart is left edge of domain excluding ghost cells - // cx=0 for ghost cell layer. - cx = 1 + int(floor((xpos - xstart) * inv_dx)); - cy = 1 + int(floor((ypos - ystart) * inv_dy)); - cz = 1 + int(floor((zpos - zstart) * inv_dz)); - // - assert_le(0,cx); - assert_le(0,cy); - assert_le(0,cz); - assert_le(cx,nxc); - assert_le(cy,nyc); - assert_le(cz,nzc); - } // get accessors for optional arrays // @@ -290,10 +250,13 @@ class Particles3Dcomm // :public Particles int BirthRank[2]; /** number of variables to be stored in buffer for communication for each particle */ int nVar; - /** Simulation domain lengths */ - double xstart, xend, ystart, yend, zstart, zend, invVOL; /** time step */ double dt; + // + // Copies of grid data (should just put pointer to Grid in this class) + // + /** Simulation domain lengths */ + double xstart, xend, ystart, yend, zstart, zend, invVOL; /** Lx = simulation box length - x direction */ double Lx; /** Ly = simulation box length - y direction */ @@ -306,6 +269,13 @@ class Particles3Dcomm // :public Particles int nxn, nyn, nzn; /** number of grid cells */ int nxc, nyc, nzc; + // convenience values from grid + double inv_dx; + double inv_dy; + double inv_dz; + // + // Communication variables + // /** buffers for communication */ /** size of sending buffers for exiting particles, DEFINED IN METHOD "COMMUNICATE" */ int buffer_size; @@ -376,6 +346,9 @@ class Particles3Dcomm // :public Particles int bcPfaceZright; /** Boundary Condition Particles: FaceYleft */ int bcPfaceZleft; + // + // Other variables + // /** speed of light in vacuum */ double c; /** restart variable for loading particles from restart file */ @@ -388,11 +361,6 @@ class Particles3Dcomm // :public Particles double Q_removed; /** density of the injection of the particles */ double Ninj; - - // convenience values from grid - double inv_dx; - double inv_dy; - double inv_dz; }; typedef Particles3Dcomm Particles; diff --git a/include/arraysfwd.h b/include/arraysfwd.h index 86dbd5d8..0afc2826 100644 --- a/include/arraysfwd.h +++ b/include/arraysfwd.h @@ -63,11 +63,13 @@ typedef iPic3D::array_fetch1 arr1_double_fetch; typedef iPic3D::array_get1 arr1_double_get; typedef iPic3D::array_get1 arr1_pfloat_get; typedef iPic3D::array_fetch2 arr2_double_fetch; +typedef iPic3D::array_fetch3 arr3_double_fetch; #else typedef double* arr1_double_fetch; typedef double* arr1_double_get; typedef pfloat* arr1_pfloat_get; typedef double** arr2_double_fetch; +typedef double*** arr3_double_fetch; #endif #endif diff --git a/main/Parameters.cpp b/main/Parameters.cpp index ca8ed480..60c45fc5 100644 --- a/main/Parameters.cpp +++ b/main/Parameters.cpp @@ -7,7 +7,7 @@ using namespace Parameters; bool Parameters::get_VECTORIZE_MOMENTS() { return false; } // supported options: SoA AoS Parameters::Enum Parameters::get_MOMENTS_TYPE() { return SoA; } -// supported options: SoA AoS AoSvec_onesort SoAvec_resort +// supported options: SoA AoS AoSvec AoS_vec_onesort SoA_vec_resort Parameters::Enum Parameters::get_MOVER_TYPE() { return SoA; } //********** derived parameters ********* @@ -19,21 +19,21 @@ static bool SORTING_SOA; void Parameters::init_parameters() { RESORTING_PARTICLES = - get_MOVER_TYPE()==SoAvec_resort - || get_MOVER_TYPE()==AoSvec_resort; + get_MOVER_TYPE()==SoA_vec_resort + || get_MOVER_TYPE()==AoS_vec_resort; SORTING_PARTICLES = get_VECTORIZE_MOMENTS() - || get_MOVER_TYPE()==SoAvec_onesort - || get_MOVER_TYPE()==AoSvec_onesort - || get_MOVER_TYPE()==SoAvec_resort - || get_MOVER_TYPE()==AoSvec_resort; + || get_MOVER_TYPE()==SoA_vec_onesort + || get_MOVER_TYPE()==AoS_vec_onesort + || get_MOVER_TYPE()==SoA_vec_resort + || get_MOVER_TYPE()==AoS_vec_resort; SORTING_SOA = get_VECTORIZE_MOMENTS() - || get_MOVER_TYPE()==SoAvec_onesort - || get_MOVER_TYPE()==SoAvec_resort; + || get_MOVER_TYPE()==SoA_vec_onesort + || get_MOVER_TYPE()==SoA_vec_resort; USING_AOS = get_MOMENTS_TYPE()==AoS || get_MOVER_TYPE()==AoS - || get_MOVER_TYPE()==AoSvec_onesort - || get_MOVER_TYPE()==AoSvec_resort; + || get_MOVER_TYPE()==AoS_vec_onesort + || get_MOVER_TYPE()==AoS_vec_resort; } bool Parameters::get_RESORTING_PARTICLES() { return RESORTING_PARTICLES; } diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 9f60f114..33f9a96c 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -285,16 +285,18 @@ bool c_Solver::ParticlesMover() { case Parameters::SoA: part[i].mover_PC(grid, vct, EMf); break; - case Parameters::SoAvec_resort: + case Parameters::SoA_vec_resort: part[i].mover_PC_vectorized(grid, vct, EMf); break; case Parameters::AoS: part[i].mover_PC_AoS(grid, vct, EMf); - //part[i].mover_PC_AoS2(grid, vct, EMf); break; - case Parameters::AoSvec_onesort: + case Parameters::AoSvec: part[i].mover_PC_AoS_vec(grid, vct, EMf); break; + case Parameters::AoS_vec_onesort: + part[i].mover_PC_AoS_vec_onesort(grid, vct, EMf); + break; default: unsupported_value_error(Parameters::get_MOVER_TYPE()); } diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index dd0af36b..5186a4d7 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -304,6 +304,47 @@ void Particles3D::AddPerturbationJ(double deltaBoB, double kx, double ky, double } } +inline void Particles3D::get_field_components_for_cell( + arr1_double_get field_components[8], + const_arr4_double fieldForPcls, + int cx,int cy,int cz) +{ + // interface to the right of cell + const int ix = cx+1; + const int iy = cy+1; + const int iz = cz+1; + + // is this faster? + // + //field_components[0] = fieldForPcls[ix][iy][iz]; // field000 + //field_components[1] = fieldForPcls[ix][iy][cz]; // field001 + //field_components[2] = fieldForPcls[ix][cy][iz]; // field010 + //field_components[3] = fieldForPcls[ix][cy][cz]; // field011 + //field_components[4] = fieldForPcls[cx][iy][iz]; // field100 + //field_components[5] = fieldForPcls[cx][iy][cz]; // field101 + //field_components[6] = fieldForPcls[cx][cy][iz]; // field110 + //field_components[7] = fieldForPcls[cx][cy][cz]; // field111 + // + // or is this? + // + // creating these aliases seems to accelerate this method (by about 30%?) + // on the Xeon host processor, suggesting deficiency in the optimizer. + // + arr3_double_fetch field0 = fieldForPcls[ix]; + arr3_double_fetch field1 = fieldForPcls[cx]; + arr2_double_fetch field00 = field0[iy]; + arr2_double_fetch field01 = field0[cy]; + arr2_double_fetch field10 = field1[iy]; + arr2_double_fetch field11 = field1[cy]; + field_components[0] = field00[iz]; // field000 + field_components[1] = field00[cz]; // field001 + field_components[2] = field01[iz]; // field010 + field_components[3] = field01[cz]; // field011 + field_components[4] = field10[iz]; // field100 + field_components[5] = field10[cz]; // field101 + field_components[6] = field11[iz]; // field110 + field_components[7] = field11[cz]; // field111 +} /** explicit mover */ void Particles3D::mover_explicit(Grid * grid, VirtualTopology3D * vct, Field * EMf) { @@ -359,9 +400,9 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { if (iy > nyc) iy = nyc; if (iz > nzc) iz = nzc; // index of cell of particle; - //const int cx = ix - 1; - //const int cy = iy - 1; - //const int cz = iz - 1; + const int cx = ix - 1; + const int cy = iy - 1; + const int cz = iz - 1; const double xi0 = xavg - grid->getXN(ix-1); const double eta0 = yavg - grid->getYN(iy-1); @@ -405,14 +446,7 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { // on the Xeon host, processor, suggesting deficiency in the optimizer. // arr1_double_get field_components[8]; - field_components[0] = fieldForPcls[ix ][iy ][iz ]; // field000 - field_components[1] = fieldForPcls[ix ][iy ][iz-1]; // field001 - field_components[2] = fieldForPcls[ix ][iy-1][iz ]; // field010 - field_components[3] = fieldForPcls[ix ][iy-1][iz-1]; // field011 - field_components[4] = fieldForPcls[ix-1][iy ][iz ]; // field100 - field_components[5] = fieldForPcls[ix-1][iy ][iz-1]; // field101 - field_components[6] = fieldForPcls[ix-1][iy-1][iz ]; // field110 - field_components[7] = fieldForPcls[ix-1][iy-1][iz-1]; // field111 + get_field_components_for_cell(field_components,fieldForPcls,cx,cy,cz); for(int c=0; c<8; c++) { @@ -457,110 +491,54 @@ void Particles3D::mover_PC(Grid * grid, VirtualTopology3D * vct, Field * EMf) { { timeTasks_end_task(TimeTasks::MOVER_PCL_MOVING); } } -void Particles3D::mover_PC_AoS2(Grid * grid, VirtualTopology3D * vct, Field * EMf) +void Particles3D::mover_PC_AoS(Grid * grid, VirtualTopology3D * vct, Field * EMf) { convertParticlesToAoS(); - SpeciesParticle * pcls = fetch_pcls(); #pragma omp master if (vct->getCartesian_rank() == 0) { cout << "*** MOVER species " << ns << " ***" << NiterMover << " ITERATIONS ****" << endl; } const_arr4_pfloat fieldForPcls = EMf->get_fieldForPcls(); + SpeciesParticle * pcls = fetch_pcls(); #pragma omp master { timeTasks_begin_task(TimeTasks::MOVER_PCL_MOVING); } - const pfloat dto2 = .5 * dt, qdto2mc = qom * dto2 / c; + const double dto2 = .5 * dt, qdto2mc = qom * dto2 / c; #pragma omp for schedule(static) for (int pidx = 0; pidx < nop; pidx++) { // copy the particle - SpeciesParticle& pcl = pcls[pidx]; - const pfloat xorig = pcl.get_x(); - const pfloat yorig = pcl.get_y(); - const pfloat zorig = pcl.get_z(); - const pfloat uorig = pcl.get_u(); - const pfloat vorig = pcl.get_v(); - const pfloat worig = pcl.get_w(); - pfloat xavg = xorig; - pfloat yavg = yorig; - pfloat zavg = zorig; - pfloat uavg; - pfloat vavg; - pfloat wavg; + SpeciesParticle* pcl = &pcls[pidx]; + ALIGNED(pcl); + const double xorig = pcl->get_x(); + const double yorig = pcl->get_y(); + const double zorig = pcl->get_z(); + const double uorig = pcl->get_u(); + const double vorig = pcl->get_v(); + const double worig = pcl->get_w(); + double xavg = xorig; + double yavg = yorig; + double zavg = zorig; + double uavg; + double vavg; + double wavg; // calculate the average velocity iteratively for (int innter = 0; innter < NiterMover; innter++) { - // interpolation G-->P - const pfloat ixd = floor((xavg - xstart) * inv_dx); - const pfloat iyd = floor((yavg - ystart) * inv_dy); - const pfloat izd = floor((zavg - zstart) * inv_dz); - // interface of index to right of cell - int ix = 2 + int(ixd); - int iy = 2 + int(iyd); - int iz = 2 + int(izd); - // use field data of closest cell in domain + // compute weights for field components // - if (ix < 1) ix = 1; - if (iy < 1) iy = 1; - if (iz < 1) iz = 1; - if (ix > nxc) ix = nxc; - if (iy > nyc) iy = nyc; - if (iz > nzc) iz = nzc; - // index of cell of particle; - //const int cx = ix - 1; - //const int cy = iy - 1; - //const int cz = iz - 1; - - const pfloat xi0 = xavg - grid->get_pfloat_XN(ix-1); - const pfloat eta0 = yavg - grid->get_pfloat_YN(iy-1); - const pfloat zeta0 = zavg - grid->get_pfloat_ZN(iz-1); - const pfloat xi1 = grid->get_pfloat_XN(ix) - xavg; - const pfloat eta1 = grid->get_pfloat_YN(iy) - yavg; - const pfloat zeta1 = grid->get_pfloat_ZN(iz) - zavg; - - pfloat Exl = 0.0; - pfloat Eyl = 0.0; - pfloat Ezl = 0.0; - pfloat Bxl = 0.0; - pfloat Byl = 0.0; - pfloat Bzl = 0.0; - - pfloat weights[8]; - const pfloat weight0 = invVOL*xi0; - const pfloat weight1 = invVOL*xi1; - const pfloat weight00 = weight0*eta0; - const pfloat weight01 = weight0*eta1; - const pfloat weight10 = weight1*eta0; - const pfloat weight11 = weight1*eta1; - weights[0] = weight00*zeta0; // weight000 - weights[1] = weight00*zeta1; // weight001 - weights[2] = weight01*zeta0; // weight010 - weights[3] = weight01*zeta1; // weight011 - weights[4] = weight10*zeta0; // weight100 - weights[5] = weight10*zeta1; // weight101 - weights[6] = weight11*zeta0; // weight110 - weights[7] = weight11*zeta1; // weight111 - //weights[0] = xi0 * eta0 * zeta0 * qi * invVOL; // weight000 - //weights[1] = xi0 * eta0 * zeta1 * qi * invVOL; // weight001 - //weights[2] = xi0 * eta1 * zeta0 * qi * invVOL; // weight010 - //weights[3] = xi0 * eta1 * zeta1 * qi * invVOL; // weight011 - //weights[4] = xi1 * eta0 * zeta0 * qi * invVOL; // weight100 - //weights[5] = xi1 * eta0 * zeta1 * qi * invVOL; // weight101 - //weights[6] = xi1 * eta1 * zeta0 * qi * invVOL; // weight110 - //weights[7] = xi1 * eta1 * zeta1 * qi * invVOL; // weight111 + double weights[8]; + int cx,cy,cz; + grid->get_safe_cell_and_weights(xavg,yavg,zavg,cx,cy,cz,weights); - // creating these aliases seems to accelerate this method by about 30% - // on the Xeon host, processor, suggesting deficiency in the optimizer. - // arr1_double_get field_components[8]; - field_components[0] = fieldForPcls[ix ][iy ][iz ]; // field000 - field_components[1] = fieldForPcls[ix ][iy ][iz-1]; // field001 - field_components[2] = fieldForPcls[ix ][iy-1][iz ]; // field010 - field_components[3] = fieldForPcls[ix ][iy-1][iz-1]; // field011 - field_components[4] = fieldForPcls[ix-1][iy ][iz ]; // field100 - field_components[5] = fieldForPcls[ix-1][iy ][iz-1]; // field101 - field_components[6] = fieldForPcls[ix-1][iy-1][iz ]; // field110 - field_components[7] = fieldForPcls[ix-1][iy-1][iz-1]; // field111 - + get_field_components_for_cell(field_components,fieldForPcls,cx,cy,cz); + + double Exl = 0.0; + double Eyl = 0.0; + double Ezl = 0.0; + double Bxl = 0.0; + double Byl = 0.0; + double Bzl = 0.0; for(int c=0; c<8; c++) { Bxl += weights[c] * field_components[c][0]; @@ -593,18 +571,18 @@ void Particles3D::mover_PC_AoS2(Grid * grid, VirtualTopology3D * vct, Field * EM zavg = zorig + wavg * dto2; } // end of iteration // update the final position and velocity - pcl.set_x(xorig + uavg * dt); - pcl.set_y(yorig + vavg * dt); - pcl.set_z(zorig + wavg * dt); - pcl.set_u(2.0 * uavg - uorig); - pcl.set_v(2.0 * vavg - vorig); - pcl.set_w(2.0 * wavg - worig); - } + pcl->set_x(xorig + uavg * dt); + pcl->set_y(yorig + vavg * dt); + pcl->set_z(zorig + wavg * dt); + pcl->set_u(2.0 * uavg - uorig); + pcl->set_v(2.0 * vavg - vorig); + pcl->set_w(2.0 * wavg - worig); + } // END OF ALL THE PARTICLES #pragma omp master { timeTasks_end_task(TimeTasks::MOVER_PCL_MOVING); } } -void Particles3D::mover_PC_AoS(Grid * grid, VirtualTopology3D * vct, Field * EMf) +void Particles3D::mover_PC_AoS_vec(Grid * grid, VirtualTopology3D * vct, Field * EMf) { convertParticlesToAoS(); #pragma omp master @@ -613,162 +591,141 @@ void Particles3D::mover_PC_AoS(Grid * grid, VirtualTopology3D * vct, Field * EMf } const_arr4_pfloat fieldForPcls = EMf->get_fieldForPcls(); + const int NUM_PCLS_MOVED_AT_A_TIME = 8; + // make sure that we won't overrun memory + assert_divides(NUM_PCLS_MOVED_AT_A_TIME,npmax); + SpeciesParticle * pcls = fetch_pcls(); #pragma omp master { timeTasks_begin_task(TimeTasks::MOVER_PCL_MOVING); } const double dto2 = .5 * dt, qdto2mc = qom * dto2 / c; #pragma omp for schedule(static) - // why does single precision make no difference in execution speed? - //#pragma simd vectorlength(VECTOR_WIDTH) - for (int pidx = 0; pidx < nop; pidx++) { - // copy the particle - SpeciesParticle* pcl = &pcls[pidx]; - ALIGNED(pcl); - const double xorig = pcl->get_x(); - const double yorig = pcl->get_y(); - const double zorig = pcl->get_z(); - const double uorig = pcl->get_u(); - const double vorig = pcl->get_v(); - const double worig = pcl->get_w(); - double xavg = xorig; - double yavg = yorig; - double zavg = zorig; - double uavg; - double vavg; - double wavg; + for (int pidx = 0; pidx < nop; pidx+=NUM_PCLS_MOVED_AT_A_TIME) + { + // copy the particles + SpeciesParticle* pcl[NUM_PCLS_MOVED_AT_A_TIME]; + for(int i=0;iget_x(j); + uorig[i][j] = pcl[i]->get_u(j); + } // calculate the average velocity iteratively for (int innter = 0; innter < NiterMover; innter++) { // compute weights for field components // - double weights[8]; - // xstart marks start of domain excluding ghosts - const double rel_xpos = xavg - xstart; - const double rel_ypos = yavg - ystart; - const double rel_zpos = zavg - zstart; - // cell position minus 1 (due to ghost cells) - const double cxm1_pos = rel_xpos * inv_dx; - const double cym1_pos = rel_ypos * inv_dy; - const double czm1_pos = rel_zpos * inv_dz; - // - int cx = 1 + int(floor(cxm1_pos)); - int cy = 1 + int(floor(cym1_pos)); - int cz = 1 + int(floor(czm1_pos)); - - // if the cell is outside the domain, then treat it as - // in the nearest ghost cell. - // - if (cx < 0) cx = 0; - if (cy < 0) cy = 0; - if (cz < 0) cz = 0; - // number of cells in x direction including ghosts is nxc - if (cx >= nxc) cx = nxc-1; - if (cy >= nyc) cy = nyc-1; - if (cz >= nzc) cz = nzc-1; - - // index of interface to right of cell - const int ix = cx + 1; - const int iy = cy + 1; - const int iz = cz + 1; - - // fraction of the distance from the right of the cell - const double w1x = cx - cxm1_pos; - const double w1y = cy - cym1_pos; - const double w1z = cz - czm1_pos; - // fraction of distance from the left - const double w0x = 1-w1x; - const double w0y = 1-w1y; - const double w0z = 1-w1z; - //const double weight00 = w0x*w0y; - //const double weight01 = w0x*w1y; - //const double weight10 = w1x*w0y; - //const double weight11 = w1x*w1y; - //weights[0] = weight00*w0z; // weight000 - //weights[1] = weight00*w1z; // weight001 - //weights[2] = weight01*w0z; // weight010 - //weights[3] = weight01*w1z; // weight011 - //weights[4] = weight10*w0z; // weight100 - //weights[5] = weight10*w1z; // weight101 - //weights[6] = weight11*w0z; // weight110 - //weights[7] = weight11*w1z; // weight111 - // - weights[0] = w0x*w0y*w0z; // weight000 - weights[1] = w0x*w0y*w1z; // weight001 - weights[2] = w0x*w1y*w0z; // weight010 - weights[3] = w0x*w1y*w1z; // weight011 - weights[4] = w1x*w0y*w0z; // weight100 - weights[5] = w1x*w0y*w1z; // weight101 - weights[6] = w1x*w1y*w0z; // weight110 - weights[7] = w1x*w1y*w1z; // weight111 - - pfloat Exl = 0.0; - pfloat Eyl = 0.0; - pfloat Ezl = 0.0; - pfloat Bxl = 0.0; - pfloat Byl = 0.0; - pfloat Bzl = 0.0; + double weights[NUM_PCLS_MOVED_AT_A_TIME][8]; + int cx[NUM_PCLS_MOVED_AT_A_TIME][3]; + for(int i=0;iget_safe_cell_and_weights(xavg[i],cx[i],weights[i]); + } - // creating these aliases seems to accelerate this method by about 30% - // on the Xeon host, processor, suggesting deficiency in the optimizer. - // - arr1_double_get field_components[8]; - field_components[0] = fieldForPcls[ix][iy][iz]; // field000 - field_components[1] = fieldForPcls[ix][iy][cz]; // field001 - field_components[2] = fieldForPcls[ix][cy][iz]; // field010 - field_components[3] = fieldForPcls[ix][cy][cz]; // field011 - field_components[4] = fieldForPcls[cx][iy][iz]; // field100 - field_components[5] = fieldForPcls[cx][iy][cz]; // field101 - field_components[6] = fieldForPcls[cx][cy][iz]; // field110 - field_components[7] = fieldForPcls[cx][cy][cz]; // field111 + arr1_double_get field_components[NUM_PCLS_MOVED_AT_A_TIME][8]; + for(int i=0;iset_x(xorig + uavg * dt); - pcl->set_y(yorig + vavg * dt); - pcl->set_z(zorig + wavg * dt); - pcl->set_u(2.0 * uavg - uorig); - pcl->set_v(2.0 * vavg - vorig); - pcl->set_w(2.0 * wavg - worig); - } // END OF ALL THE PARTICLES + // #pragma simd collapse(2) + for(int i=0;iset_x(j, xorig[i][j] + uavg[i][j] * dt); + pcl[i]->set_u(j, 2.*uavg[i][j] - uorig[i][j]); + } + } #pragma omp master { timeTasks_end_task(TimeTasks::MOVER_PCL_MOVING); } } -// this currently computes garbage but execution time -// suggests bound on performance. For correct execution -// would need to sort by xavg with each iteration -// like in mover_PC_vectorized -void Particles3D::mover_PC_AoS_vec( +// This currently computes extrapolated values based on field in +// original mesh cell (unstable?), but execution time suggests +// bound on performance. For correct execution would need to +// sort by xavg with each iteration like in mover_PC_vectorized. +// But in fact this does not run any faster than mover_PC_AoS +// +void Particles3D::mover_PC_AoS_vec_onesort( Grid * grid, VirtualTopology3D * vct, Field * EMf) { convertParticlesToAoS(); @@ -789,20 +746,11 @@ void Particles3D::mover_PC_AoS_vec( for(int cz=0;cz= nxc) cx = nxc-1; - if (cy >= nyc) cy = nyc-1; - if (cz >= nzc) cz = nzc-1; - - // index of interface to right of cell - const int ix = cx + 1; - const int iy = cy + 1; - const int iz = cz + 1; // fraction of the distance from the right of the cell const double w1x = cx - cxm1_pos; @@ -871,14 +799,16 @@ void Particles3D::mover_PC_AoS_vec( const double w0y = 1-w1y; const double w0z = 1-w1z; // - weights[0] = w0x*w0y*w0z; // weight000 - weights[1] = w0x*w0y*w1z; // weight001 - weights[2] = w0x*w1y*w0z; // weight010 - weights[3] = w0x*w1y*w1z; // weight011 - weights[4] = w1x*w0y*w0z; // weight100 - weights[5] = w1x*w0y*w1z; // weight101 - weights[6] = w1x*w1y*w0z; // weight110 - weights[7] = w1x*w1y*w1z; // weight111 + Grid::get_weights(weights, w0x, w0y, w0z, w1x, w1y, w1z); + + //if(false) // this would fail + //{ + // int cx_,cy_,cz_; + // grid->get_safe_cell_coordinates(xavg,yavg,zavg,cx_,cy_,cz_); + // assert_eq(cx,cx_); + // assert_eq(cy,cy_); + // assert_eq(cz,cz_); + //} pfloat Exl = 0.0; pfloat Eyl = 0.0; @@ -886,7 +816,6 @@ void Particles3D::mover_PC_AoS_vec( pfloat Bxl = 0.0; pfloat Byl = 0.0; pfloat Bzl = 0.0; - for(int c=0; c<8; c++) { Bxl += weights[c] * field_components[c][0]; diff --git a/particles/Particles3Dcomm.cpp b/particles/Particles3Dcomm.cpp index a3ac877d..7183eb16 100644 --- a/particles/Particles3Dcomm.cpp +++ b/particles/Particles3Dcomm.cpp @@ -1044,7 +1044,7 @@ void Particles3Dcomm::sort_particles_serial_AoS( const SpeciesParticle& pcl = get_pcl(pidx); // get the cell indices of the particle int cx,cy,cz; - get_safe_cell_for_pos(cx,cy,cz,pcl.get_x(),pcl.get_y(),pcl.get_z()); + grid->get_safe_cell_coordinates(cx,cy,cz,pcl.get_x(),pcl.get_y(),pcl.get_z()); // increment the number of particles in bucket of this particle (*numpcls_in_bucket)[cx][cy][cz]++; @@ -1070,7 +1070,7 @@ void Particles3Dcomm::sort_particles_serial_AoS( const SpeciesParticle& pcl = get_pcl(pidx); // get the cell indices of the particle int cx,cy,cz; - get_safe_cell_for_pos(cx,cy,cz,pcl.get_x(),pcl.get_y(),pcl.get_z()); + grid->get_safe_cell_coordinates(cx,cy,cz,pcl.get_x(),pcl.get_y(),pcl.get_z()); // compute where the data should go const int numpcls_now = (*numpcls_in_bucket_now)[cx][cy][cz]++; @@ -1132,7 +1132,7 @@ void Particles3Dcomm::sort_particles_serial_SoA( // get the cell indices of the particle // int cx,cy,cz; - get_safe_cell_for_pos(cx,cy,cz,x[pidx],y[pidx],z[pidx]); + grid->get_safe_cell_coordinates(cx,cy,cz,x[pidx],y[pidx],z[pidx]); // // is it better just to recompute this? // @@ -1165,7 +1165,7 @@ void Particles3Dcomm::sort_particles_serial_SoA( // get the cell indices of the particle // int cx,cy,cz; - get_safe_cell_for_pos(cx,cy,cz,x[pidx],y[pidx],z[pidx]); + grid->get_safe_cell_coordinates(cx,cy,cz,x[pidx],y[pidx],z[pidx]); // //cx = xcell[pidx]; //cy = ycell[pidx]; @@ -1231,7 +1231,7 @@ void Particles3Dcomm::sort_particles_serial_SoA( // confirm that particle is in correct cell { int cx_,cy_,cz_; - get_safe_cell_for_pos(cx_,cy_,cz_,x[pidx],y[pidx],z[pidx]); + grid->get_safe_cell_coordinates(cx_,cy_,cz_,x[pidx],y[pidx],z[pidx]); if((cx_!=cx) ||(cy_!=cy) ||(cz_!=cz)) @@ -1285,7 +1285,7 @@ void Particles3Dcomm::sort_particles_serial_SoA_by_xavg( // get the cell indices of the particle // int cx,cy,cz; - get_safe_cell_for_pos(cx,cy,cz,xavg[pidx],yavg[pidx],zavg[pidx]); + grid->get_safe_cell_coordinates(cx,cy,cz,xavg[pidx],yavg[pidx],zavg[pidx]); // // is it better just to recompute this? // @@ -1318,7 +1318,7 @@ void Particles3Dcomm::sort_particles_serial_SoA_by_xavg( // get the cell indices of the particle // int cx,cy,cz; - get_safe_cell_for_pos(cx,cy,cz,xavg[pidx],yavg[pidx],zavg[pidx]); + grid->get_safe_cell_coordinates(cx,cy,cz,xavg[pidx],yavg[pidx],zavg[pidx]); // //cx = xcell[pidx]; //cy = ycell[pidx]; @@ -1396,7 +1396,7 @@ void Particles3Dcomm::sort_particles_serial_SoA_by_xavg( if(true) { int cx_,cy_,cz_; - get_safe_cell_for_pos(cx_,cy_,cz_,xavg[pidx],yavg[pidx],zavg[pidx]); + grid->get_safe_cell_coordinates(cx_,cy_,cz_,xavg[pidx],yavg[pidx],zavg[pidx]); if((cx_!=cx) ||(cy_!=cy) ||(cz_!=cz)) From fe2a01ab5233ec36963ea3d97d446ec93ce987cc Mon Sep 17 00:00:00 2001 From: eajohnson Date: Sat, 22 Feb 2014 11:20:50 +0100 Subject: [PATCH 101/118] created utility/Basic.cpp for methods that need not be inline --- include/Basic.h | 550 ++++++---------------------------------------- utility/Basic.cpp | 525 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 592 insertions(+), 483 deletions(-) create mode 100644 utility/Basic.cpp diff --git a/include/Basic.h b/include/Basic.h index 2dd7da6c..09a0d54c 100644 --- a/include/Basic.h +++ b/include/Basic.h @@ -5,18 +5,9 @@ developers: Stefano Markidis, Giovanni Lapenta ********************************************************************************************/ #ifndef Basic_H #define Basic_H - -#include +#include "arraysfwd.h" #include -#include "MPIdata.h" -#include "EllipticF.h" -#include "Alloc.h" - -using std::cout; -using std::endl; - - /** * * Basic operations defined. This library provides methods to calculate: @@ -39,400 +30,120 @@ using std::endl; /** method to calculate the parallel dot product with vect1, vect2 having the ghost cells*/ -inline double dotP(double *vect1, double *vect2, int n) { - double result = 0; - double local_result = 0; - for (register int i = 0; i < n; i++) - local_result += vect1[i] * vect2[i]; - MPI_Allreduce(&local_result, &result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - return (result); - -} +double dotP(double *vect1, double *vect2, int n); /** method to calculate dot product */ -inline double dot(double *vect1, double *vect2, int n) { - double result = 0; - for (int i = 0; i < n; i++) - result += vect1[i] * vect2[i]; - return (result); -} +double dot(double *vect1, double *vect2, int n); /** method to calculate the square norm of a vector */ -inline double norm2(double **vect, int nx, int ny) { - double result = 0; - for (int i = 0; i < nx; i++) - for (int j = 0; j < ny; j++) - result += vect[i][j] * vect[i][j]; - return (result); -} +double norm2(double **vect, int nx, int ny); /** method to calculate the square norm of a vector */ -inline double norm2(const arr3_double vect, int nx, int ny) { - double result = 0; - for (int i = 0; i < nx; i++) - for (int j = 0; j < ny; j++) - result += vect.get(i,j,0) * vect.get(i,j,0); - return (result); -} +double norm2(const arr3_double vect, int nx, int ny); /** method to calculate the square norm of a vector */ -inline double norm2(double *vect, int nx) { - double result = 0; - for (int i = 0; i < nx; i++) - result += vect[i] * vect[i]; - return (result); -} - - - +double norm2(double *vect, int nx); /** method to calculate the parallel dot product */ -inline double norm2P(const arr3_double vect, int nx, int ny, int nz) { - double result = 0; - double local_result = 0; - for (int i = 0; i < nx; i++) - for (int j = 0; j < ny; j++) - for (int k = 0; k < nz; k++) - local_result += vect.get(i,j,k) * vect.get(i,j,k); - - MPI_Allreduce(&local_result, &result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - return (result); -} +double norm2P(const arr3_double vect, int nx, int ny, int nz); /** method to calculate the parallel norm of a vector on different processors with the ghost cell */ -inline double norm2P(double *vect, int n) { - double result = 0; - double local_result = 0; - for (int i = 0; i < n; i++) - local_result += vect[i] * vect[i]; - MPI_Allreduce(&local_result, &result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - return (result); -} +double norm2P(double *vect, int n); /** method to calculate the parallel norm of a vector on different processors with the gost cell*/ -inline double normP(double *vect, int n) { - double result = 0.0; - double local_result = 0.0; - for (register int i = 0; i < n; i++) - local_result += vect[i] * vect[i]; - - - MPI_Allreduce(&local_result, &result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - - return (sqrt(result)); - -} +double normP(double *vect, int n); /** method to calculate the difference of two vectors*/ -inline void sub(double *res, double *vect1, double *vect2, int n) { - for (register int i = 0; i < n; i++) - res[i] = vect1[i] - vect2[i]; -} +void sub(double *res, double *vect1, double *vect2, int n); /** method to calculate the sum of two vectors vector1 = vector1 + vector2*/ -inline void sum(double *vect1, double *vect2, int n) { - for (register int i = 0; i < n; i++) - vect1[i] += vect2[i]; - - -} +void sum(double *vect1, double *vect2, int n); /** method to calculate the sum of two vectors vector1 = vector1 + vector2*/ -inline void sum(arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) - vect1.fetch(i,j,k) += vect2.get(i,j,k); -} - +void sum(arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz); /** method to calculate the sum of two vectors vector1 = vector1 + vector2*/ -inline void sum(arr3_double vect1, const arr3_double vect2, int nx, int ny) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - vect1.fetch(i,j,0) += vect2.get(i,j,0); -} - +void sum(arr3_double vect1, const arr3_double vect2, int nx, int ny); /** method to calculate the sum of two vectors vector1 = vector1 + vector2*/ -inline void sum(arr3_double vect1, const arr4_double vect2, int nx, int ny, int nz, int ns) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) - vect1.fetch(i,j,k) += vect2.get(ns,i,j,k); -} - +void sum(arr3_double vect1, const arr4_double vect2, int nx, int ny, int nz, int ns); /** method to calculate the sum of two vectors vector1 = vector1 + vector2*/ -inline void sum(arr3_double vect1, const arr4_double vect2, int nx, int ny, int ns) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - vect1.fetch(i,j,0) += vect2.get(ns,i,j,0); -} +void sum(arr3_double vect1, const arr4_double vect2, int nx, int ny, int ns); /** method to calculate the subtraction of two vectors vector1 = vector1 - vector2*/ -inline void sub(arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) - vect1.fetch(i,j,k) -= vect2.get(i,j,k); -} - +void sub(arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz); /** method to calculate the subtraction of two vectors vector1 = vector1 - vector2*/ -inline void sub(arr3_double vect1, const arr3_double vect2, int nx, int ny) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - vect1.fetch(i,j,0) -= vect2.get(i,j,0); -} - - +void sub(arr3_double vect1, const arr3_double vect2, int nx, int ny); /** method to sum 4 vectors vector1 = alfa*vector1 + beta*vector2 + gamma*vector3 + delta*vector4 */ -inline void sum4(arr3_double vect1, double alfa, const arr3_double vect2, double beta, const arr3_double vect3, double gamma, const arr3_double vect4, double delta, const arr3_double vect5, int nx, int ny, int nz) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) - vect1.fetch(i,j,k) = alfa * (vect2.get(i,j,k) + beta * vect3.get(i,j,k) + gamma * vect4.get(i,j,k) + delta * vect5.get(i,j,k)); - -} +void sum4(arr3_double vect1, double alfa, const arr3_double vect2, double beta, const arr3_double vect3, double gamma, const arr3_double vect4, double delta, const arr3_double vect5, int nx, int ny, int nz); /** method to calculate the scalar-vector product */ -inline void scale(double *vect, double alfa, int n) { - for (register int i = 0; i < n; i++) - vect[i] *= alfa; -} - +void scale(double *vect, double alfa, int n); /** method to calculate the scalar-vector product */ -inline void scale(arr3_double vect, double alfa, int nx, int ny) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - vect.fetch(i,j,0) *= alfa; -} - - +void scale(arr3_double vect, double alfa, int nx, int ny); /** method to calculate the scalar-vector product */ -inline void scale(arr3_double vect, double alfa, int nx, int ny, int nz) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) - vect.fetch(i,j,k) *= alfa; -} -/** method to calculate the scalar product */ -inline void scale(double vect[][2][2], double alfa, int nx, int ny, int nz) { - for (int i = 0; i < nx; i++) - for (int j = 0; j < ny; j++) - for (int k = 0; k < nz; k++) - vect[i][j][k] *= alfa; -} +void scale(arr3_double vect, double alfa, int nx, int ny, int nz); +///** method to calculate the scalar product */ +//inline void scale(double vect[][2][2], double alfa, int nx, int ny, int nz) { +// for (int i = 0; i < nx; i++) +// for (int j = 0; j < ny; j++) +// for (int k = 0; k < nz; k++) +// vect[i][j][k] *= alfa; +//} /** method to calculate the scalar-vector product */ -inline void scale(arr3_double vect1, const arr3_double vect2, double alfa, int nx, int ny, int nz) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) - vect1.fetch(i,j,k) = vect2.get(i,j,k) * alfa; -} - +void scale(arr3_double vect1, const arr3_double vect2, double alfa, int nx, int ny, int nz); /** method to calculate the scalar-vector product */ -inline void scale(arr3_double vect1, const arr3_double vect2, double alfa, int nx, int ny) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - vect1.fetch(i,j,0) = vect2.get(i,j,0) * alfa; -} - +void scale(arr3_double vect1, const arr3_double vect2, double alfa, int nx, int ny); /** method to calculate the scalar-vector product */ -inline void scale(double *vect1, double *vect2, double alfa, int n) { - for (register int i = 0; i < n; i++) - vect1[i] = vect2[i] * alfa; -} - +void scale(double *vect1, double *vect2, double alfa, int n); /** method to calculate vector1 = vector1 + alfa*vector2 */ -inline void addscale(double alfa, arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) - vect1.fetch(i,j,k) = vect1.get(i,j,k) + alfa * vect2.get(i,j,k); -} +void addscale(double alfa, arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz); /** add scale for weights */ -inline void addscale(double alfa, double vect1[][2][2], double vect2[][2][2], int nx, int ny, int nz) { - for (int i = 0; i < nx; i++) - for (int j = 0; j < ny; j++) - for (int k = 0; k < nz; k++) - vect1[i][j][k] = vect1[i][j][k] + alfa * vect2[i][j][k]; - -} +void addscale(double alfa, double vect1[][2][2], double vect2[][2][2], int nx, int ny, int nz); /** method to calculate vector1 = vector1 + alfa*vector2 */ -inline void addscale(double alfa, arr3_double vect1, const arr3_double vect2, int nx, int ny) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - vect1.fetch(i,j,0) += alfa * vect2.get(i,j,0); -} +void addscale(double alfa, arr3_double vect1, const arr3_double vect2, int nx, int ny); /** method to calculate vector1 = vector1 + alfa*vector2 */ -inline void addscale(double alfa, double *vect1, double *vect2, int n) { - for (register int i = 0; i < n; i++) - vect1[i] += alfa * vect2[i]; - -} +void addscale(double alfa, double *vect1, double *vect2, int n); /** method to calculate vector1 = beta*vector1 + alfa*vector2 */ -inline void addscale(double alfa, double beta, double *vect1, double *vect2, int n) { - for (register int i = 0; i < n; i++) - vect1[i] = vect1[i] * beta + alfa * vect2[i]; - -} +void addscale(double alfa, double beta, double *vect1, double *vect2, int n); /** method to calculate vector1 = beta*vector1 + alfa*vector2 */ -inline void addscale(double alfa, double beta, arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz) { - - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) { - vect1.fetch(i,j,k) = beta * vect1.get(i,j,k) + alfa * vect2.get(i,j,k); - } - -} +void addscale(double alfa, double beta, arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz); /** method to calculate vector1 = beta*vector1 + alfa*vector2 */ -inline void addscale(double alfa, double beta, arr3_double vect1, const arr3_double vect2, int nx, int ny) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - vect1.fetch(i,j,0) = beta * vect1.get(i,j,0) + alfa * vect2.get(i,j,0); - -} - - +void addscale(double alfa, double beta, arr3_double vect1, const arr3_double vect2, int nx, int ny); /** method to calculate vector1 = alfa*vector2 + beta*vector3 */ -inline void scaleandsum(arr3_double vect1, double alfa, double beta, const arr3_double vect2, const arr3_double vect3, int nx, int ny, int nz) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) - vect1.fetch(i,j,k) = alfa * vect2.get(i,j,k) + beta * vect3.get(i,j,k); -} +void scaleandsum(arr3_double vect1, double alfa, double beta, const arr3_double vect2, const arr3_double vect3, int nx, int ny, int nz); /** method to calculate vector1 = alfa*vector2 + beta*vector3 with vector2 depending on species*/ -inline void scaleandsum(arr3_double vect1, double alfa, double beta, const arr4_double vect2, const arr3_double vect3, int ns, int nx, int ny, int nz) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) - vect1.fetch(i,j,k) = alfa * vect2.get(ns,i,j,k) + beta * vect3.get(i,j,k); -} +void scaleandsum(arr3_double vect1, double alfa, double beta, const arr4_double vect2, const arr3_double vect3, int ns, int nx, int ny, int nz); /** method to calculate vector1 = alfa*vector2*vector3 with vector2 depending on species*/ -inline void prod(arr3_double vect1, double alfa, const arr4_double vect2, int ns, const arr3_double vect3, int nx, int ny, int nz) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) - vect1.fetch(i,j,k) = alfa * vect2.get(ns,i,j,k) * vect3.get(i,j,k); - -} +void prod(arr3_double vect1, double alfa, const arr4_double vect2, int ns, const arr3_double vect3, int nx, int ny, int nz); /** method to calculate vect1 = vect2/alfa */ -inline void div(arr3_double vect1, double alfa, const arr3_double vect2, int nx, int ny, int nz) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) - vect1.fetch(i,j,k) = vect2.get(i,j,k) / alfa; - -} -inline void prod6(arr3_double vect1, const arr3_double vect2, const arr3_double vect3, const arr3_double vect4, const arr3_double vect5, const arr3_double vect6, const arr3_double vect7, int nx, int ny, int nz) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) - vect1.fetch(i,j,k) = vect2.get(i,j,k) * vect3.get(i,j,k) + vect4.get(i,j,k) * vect5.get(i,j,k) + vect6.get(i,j,k) * vect7.get(i,j,k); -} +void div(arr3_double vect1, double alfa, const arr3_double vect2, int nx, int ny, int nz); +void prod6(arr3_double vect1, const arr3_double vect2, const arr3_double vect3, const arr3_double vect4, const arr3_double vect5, const arr3_double vect6, const arr3_double vect7, int nx, int ny, int nz); /** method used for calculating PI */ -inline void proddiv(arr3_double vect1, const arr3_double vect2, double alfa, const arr3_double vect3, const arr3_double vect4, const arr3_double vect5, const arr3_double vect6, double beta, const arr3_double vect7, const arr3_double vect8, double gamma, const arr3_double vect9, int nx, int ny, int nz) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) - vect1.fetch(i,j,k) = (vect2.get(i,j,k) + alfa * (vect3.get(i,j,k) * vect4.get(i,j,k) - vect5.get(i,j,k) * vect6.get(i,j,k)) + beta * vect7.get(i,j,k) * vect8.get(i,j,k)) / (1 + gamma * vect9.get(i,j,k)); - - // questo mi convince veramente poco!!!!!!!!!!!!!! CAZZO!!!!!!!!!!!!!!!!!! - // ***vect1++ = (***vect2++ + alfa*((***vect3++)*(***vect4++) - (***vect5++)*(***vect6++)) + beta*(***vect7++)*(***vect8++))/(1+gamma*(***vect9++)); -} +void proddiv(arr3_double vect1, const arr3_double vect2, double alfa, const arr3_double vect3, const arr3_double vect4, const arr3_double vect5, const arr3_double vect6, double beta, const arr3_double vect7, const arr3_double vect8, double gamma, const arr3_double vect9, int nx, int ny, int nz); /** method to calculate the opposite of a vector */ -inline void neg(arr3_double vect, int nx, int ny, int nz) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) - vect.fetch(i,j,k) = -vect.get(i,j,k); -} - +void neg(arr3_double vect, int nx, int ny, int nz); /** method to calculate the opposite of a vector */ -inline void neg(arr3_double vect, int nx, int ny) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - vect.fetch(i,j,0) = -vect.get(i,j,0); -} +void neg(arr3_double vect, int nx, int ny); /** method to calculate the opposite of a vector */ -inline void neg(arr3_double vect, int nx) { - for (register int i = 0; i < nx; i++) - vect.fetch(i,0,0) = -vect.get(i,0,0); -} +void neg(arr3_double vect, int nx); /** method to calculate the opposite of a vector */ -inline void neg(double *vect, int n) { - for (register int i = 0; i < n; i++) - vect[i] = -vect[i]; - - -} +void neg(double *vect, int n); /** method to set equal two vectors */ -inline void eq(arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) - vect1.fetch(i,j,k) = vect2.get(i,j,k); - -} +void eq(arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz); /** method to set equal two vectors */ -inline void eq(arr3_double vect1, const arr3_double vect2, int nx, int ny) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - vect1.fetch(i,j,0) = vect2.get(i,j,0); - -} - +void eq(arr3_double vect1, const arr3_double vect2, int nx, int ny); /** method to set equal two vectors */ -inline void eq(arr4_double vect1, const arr3_double vect2, int nx, int ny, int is) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - vect1.fetch(is,i,j,0) = vect2.get(i,j,0); - -} +void eq(arr4_double vect1, const arr3_double vect2, int nx, int ny, int is); /** method to set equal two vectors */ -inline void eq(arr4_double vect1, const arr3_double vect2, int nx, int ny, int nz, int is) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) - vect1.fetch(is,i,j,k) = vect2.get(i,j,k); - -} - -inline void eq(double *vect1, double *vect2, int n) { +void eq(arr4_double vect1, const arr3_double vect2, int nx, int ny, int nz, int is); +inline void eq(double *vect1, double *vect2, int n){ for (register int i = 0; i < n; i++) vect1[i] = vect2[i]; } /** method to set a vector to a Value */ -inline void eqValue(double value, arr3_double vect, int nx, int ny, int nz) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - for (register int k = 0; k < nz; k++) - vect.fetch(i,j,k) = value; - -} -inline void eqValue(double value, double vect[][2][2], int nx, int ny, int nz) { - for (int i = 0; i < nx; i++) - for (int j = 0; j < ny; j++) - for (int k = 0; k < nz; k++) - vect[i][j][k] = value; - -} +void eqValue(double value, arr3_double vect, int nx, int ny, int nz); +//void eqValue(double value, double vect[][2][2], int nx, int ny, int nz); /** method to set a vector to a Value */ -inline void eqValue(double value, arr3_double vect, int nx, int ny) { - for (register int i = 0; i < nx; i++) - for (register int j = 0; j < ny; j++) - vect.fetch(i,j,0) = value; - -} +void eqValue(double value, arr3_double vect, int nx, int ny); /** method to set a vector to a Value */ -inline void eqValue(double value, arr3_double vect, int nx) { - for (register int i = 0; i < nx; i++) - vect.fetch(i,0,0) = value; - -} +void eqValue(double value, arr3_double vect, int nx); /** method to set a vector to a Value */ -inline void eqValue(double value, double *vect, int n) { - for (register int i = 0; i < n; i++) - vect[i] = value; -} +void eqValue(double value, double *vect, int n); /** method to put a column in a matrix 2D */ -inline void putColumn(double **Matrix, double *vect, int column, int n) { - for (int i = 0; i < n; i++) - Matrix[i][column] = vect[i]; - -} +void putColumn(double **Matrix, double *vect, int column, int n); /** method to get a column in a matrix 2D */ -inline void getColumn(double *vect, double **Matrix, int column, int n) { - for (int i = 0; i < n; i++) - vect[i] = Matrix[i][column]; -} +void getColumn(double *vect, double **Matrix, int column, int n); + +/** method to get rid of the ghost cells */ +inline void getRidGhost(double **out, double **in, int nx, int ny); + /** RIFAI QUESTA PARTE questo e' la tomba delle performance*/ inline void MODULO(double *x, double L) { *x = *x - floor(*x / L) * L; @@ -456,12 +167,6 @@ inline double eps() { eps = num * 2; return (eps); } -/** method to get rid of the ghost cells */ -inline void getRidGhost(double **out, double **in, int nx, int ny) { - for (register int i = 1; i < nx - 1; i++) - for (register int j = 1; j < ny - 1; j++) - out[i - 1][j - 1] = in[i][j]; -} /** method to calculate cross product of two vectors C= A x B */ inline void cross_product(double a1, double a2, double a3, double b1, double b2, double b3, double *c){ c[0] = a2 * b3 - a3 * b2; @@ -469,129 +174,8 @@ inline void cross_product(double a1, double a2, double a3, double b1, double b2, c[2] = a1 * b2 - a2 * b1; } -inline void loopX(double *b, double z, double x, double y, double a, double zc, double xc, double yc, double m){ - - double r = sqrt((x-xc)*(x-xc)+(y-yc)*(y-yc)+(z-zc)*(z-zc)); - double theta = acos((z-zc+1e-10)/(r+1e-10)); - double phi = atan2(y-yc,x-xc); - //double Rho = r * sin(theta); - double Rho = sqrt((x-xc)*(x-xc)+(y-yc)*(y-yc)); - - double Alpha = Rho/a; - double Beta = (z-zc)/a; - double Gamma = (z-zc+1e-10)/(Rho+1e-10); - - double Q = ((1 + Alpha)*(1 + Alpha) + Beta*Beta); - double k = sqrt(4*Alpha/Q); - double B0 = m / (2*a); //m * (C_LIGHT * MU0)/(2*a*a*a*M_PI); - - int err = 0; - - double Bz = B0*(EllipticE(k,err)*(1-Alpha*Alpha-Beta*Beta)/(Q-4*Alpha)+EllipticF(k,err))/(M_PI*sqrt(Q)); - double BRho = B0*Gamma*(EllipticE(k,err)*(1+Alpha*Alpha+Beta*Beta)/(Q-4*Alpha)-EllipticF(k,err))/(M_PI*sqrt(Q)); - - if (err) - cout << "Err came back :" << err << endl; - - if ( isnan(BRho) ) - BRho = 0; - if ( isnan(Bz) ) - Bz = 0; - - double Bx = BRho * cos(phi); - double By = BRho * sin(phi); - - //for debugging - /*cout << "\n\nAt (" << x << "," << y << "," << z << "), the field is :" << endl; - cout << "Bx: " << Bx << " T" << endl; - cout << "By: " << By << " T" << endl; - cout << "Bz: " << Bz << " T" << endl; - cout << "BRho: " << BRho << " T" << endl;*/ - - b[1] = Bx; - b[2] = By; - b[0] = Bz; -} - -inline void loopY(double *b, double y, double z, double x, double a, double yc, double zc, double xc, double m){ - - double r = sqrt((x-xc)*(x-xc)+(y-yc)*(y-yc)+(z-zc)*(z-zc)); - double theta = acos((z-zc+1e-10)/(r+1e-10)); - double phi = atan2(y-yc,x-xc); - //double Rho = r * sin(theta); - double Rho = sqrt((x-xc)*(x-xc)+(y-yc)*(y-yc)); - - double Alpha = Rho/a; - double Beta = (z-zc)/a; - double Gamma = (z-zc+1e-10)/(Rho+1e-10); - - double Q = ((1 + Alpha)*(1 + Alpha) + Beta*Beta); - double k = sqrt(4*Alpha/Q); - double B0 = m / (2*a); //m * (C_LIGHT * MU0)/(2*a*a*a*M_PI); - - int err = 0; - - double Bz = B0*(EllipticE(k,err)*(1-Alpha*Alpha-Beta*Beta)/(Q-4*Alpha)+EllipticF(k,err))/(M_PI*sqrt(Q)); - double BRho = B0*Gamma*(EllipticE(k,err)*(1+Alpha*Alpha+Beta*Beta)/(Q-4*Alpha)-EllipticF(k,err))/(M_PI*sqrt(Q)); - - if (err) - cout << "Err came back :" << err << endl; - - if ( isnan(BRho) ) - BRho = 0; - if ( isnan(Bz) ) - Bz = 0; - - double Bx = BRho * cos(phi); - double By = BRho * sin(phi); - - //for debugging - /*cout << "\n\nAt (" << x << "," << y << "," << z << "), the field is :" << endl; - cout << "Bx: " << Bx << " T" << endl; - cout << "By: " << By << " T" << endl; - cout << "Bz: " << Bz << " T" << endl; - cout << "BRho: " << BRho << " T" << endl;*/ - - b[2] = Bx; - b[0] = By; - b[1] = Bz; -} - -inline void loopZ(double *b, double x, double y, double z, double a, double xc, double yc, double zc, double m){ - - double r = sqrt((x-xc)*(x-xc)+(y-yc)*(y-yc)+(z-zc)*(z-zc)); - double theta = acos((z-zc+1e-10)/(r+1e-10)); - double phi = atan2(y-yc,x-xc); - - double Rho = sqrt((x-xc)*(x-xc)+(y-yc)*(y-yc)); - - double Alpha = Rho/a; - double Beta = (z-zc)/a; - double Gamma = (z-zc+1e-10)/(Rho+1e-10); - - double Q = ((1 + Alpha)*(1 + Alpha) + Beta*Beta); - double k = sqrt(4*Alpha/Q); - double B0 = m / (2*a); //m * (C_LIGHT * MU0)/(2*a*a*a*M_PI); - - int err = 0; - - double Bz = B0*(EllipticE(k,err)*(1-Alpha*Alpha-Beta*Beta)/(Q-4*Alpha)+EllipticF(k,err))/(M_PI*sqrt(Q)); - double BRho = B0*Gamma*(EllipticE(k,err)*(1+Alpha*Alpha+Beta*Beta)/(Q-4*Alpha)-EllipticF(k,err))/(M_PI*sqrt(Q)); - - if (err) - cout << "Err came back :" << err << endl; - - if ( isnan(BRho) ) - BRho = 0; - if ( isnan(Bz) ) - Bz = 0; - - double Bx = BRho * cos(phi); - double By = BRho * sin(phi); - - b[0] = Bx; - b[1] = By; - b[2] = Bz; -} +void loopX(double *b, double z, double x, double y, double a, double zc, double xc, double yc, double m); +void loopY(double *b, double y, double z, double x, double a, double yc, double zc, double xc, double m); +void loopZ(double *b, double x, double y, double z, double a, double xc, double yc, double zc, double m); #endif diff --git a/utility/Basic.cpp b/utility/Basic.cpp new file mode 100644 index 00000000..7725b680 --- /dev/null +++ b/utility/Basic.cpp @@ -0,0 +1,525 @@ +#include "mpi.h" +#include "Basic.h" + +#include + +#include "MPIdata.h" +#include "EllipticF.h" +#include "Alloc.h" + +/** method to calculate the parallel dot product with vect1, vect2 having the ghost cells*/ +double dotP(double *vect1, double *vect2, int n) { + double result = 0; + double local_result = 0; + for (register int i = 0; i < n; i++) + local_result += vect1[i] * vect2[i]; + MPI_Allreduce(&local_result, &result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + return (result); + +} +/** method to calculate dot product */ +double dot(double *vect1, double *vect2, int n) { + double result = 0; + for (int i = 0; i < n; i++) + result += vect1[i] * vect2[i]; + return (result); +} +/** method to calculate the square norm of a vector */ +double norm2(double **vect, int nx, int ny) { + double result = 0; + for (int i = 0; i < nx; i++) + for (int j = 0; j < ny; j++) + result += vect[i][j] * vect[i][j]; + return (result); +} +/** method to calculate the square norm of a vector */ +double norm2(const arr3_double vect, int nx, int ny) { + double result = 0; + for (int i = 0; i < nx; i++) + for (int j = 0; j < ny; j++) + result += vect.get(i,j,0) * vect.get(i,j,0); + return (result); +} +/** method to calculate the square norm of a vector */ +double norm2(double *vect, int nx) { + double result = 0; + for (int i = 0; i < nx; i++) + result += vect[i] * vect[i]; + return (result); +} + + + +/** method to calculate the parallel dot product */ +double norm2P(const arr3_double vect, int nx, int ny, int nz) { + double result = 0; + double local_result = 0; + for (int i = 0; i < nx; i++) + for (int j = 0; j < ny; j++) + for (int k = 0; k < nz; k++) + local_result += vect.get(i,j,k) * vect.get(i,j,k); + + MPI_Allreduce(&local_result, &result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + return (result); +} +/** method to calculate the parallel norm of a vector on different processors with the ghost cell */ +double norm2P(double *vect, int n) { + double result = 0; + double local_result = 0; + for (int i = 0; i < n; i++) + local_result += vect[i] * vect[i]; + MPI_Allreduce(&local_result, &result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + return (result); +} +/** method to calculate the parallel norm of a vector on different processors with the gost cell*/ +double normP(double *vect, int n) { + double result = 0.0; + double local_result = 0.0; + for (register int i = 0; i < n; i++) + local_result += vect[i] * vect[i]; + + + MPI_Allreduce(&local_result, &result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + return (sqrt(result)); + +} +/** method to calculate the difference of two vectors*/ +void sub(double *res, double *vect1, double *vect2, int n) { + for (register int i = 0; i < n; i++) + res[i] = vect1[i] - vect2[i]; +} +/** method to calculate the sum of two vectors vector1 = vector1 + vector2*/ +void sum(double *vect1, double *vect2, int n) { + for (register int i = 0; i < n; i++) + vect1[i] += vect2[i]; + + +} +/** method to calculate the sum of two vectors vector1 = vector1 + vector2*/ +void sum(arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) + vect1.fetch(i,j,k) += vect2.get(i,j,k); +} + +/** method to calculate the sum of two vectors vector1 = vector1 + vector2*/ +void sum(arr3_double vect1, const arr3_double vect2, int nx, int ny) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + vect1.fetch(i,j,0) += vect2.get(i,j,0); +} + +/** method to calculate the sum of two vectors vector1 = vector1 + vector2*/ +void sum(arr3_double vect1, const arr4_double vect2, int nx, int ny, int nz, int ns) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) + vect1.fetch(i,j,k) += vect2.get(ns,i,j,k); +} + +/** method to calculate the sum of two vectors vector1 = vector1 + vector2*/ +void sum(arr3_double vect1, const arr4_double vect2, int nx, int ny, int ns) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + vect1.fetch(i,j,0) += vect2.get(ns,i,j,0); +} +/** method to calculate the subtraction of two vectors vector1 = vector1 - vector2*/ +void sub(arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) + vect1.fetch(i,j,k) -= vect2.get(i,j,k); +} + +/** method to calculate the subtraction of two vectors vector1 = vector1 - vector2*/ +void sub(arr3_double vect1, const arr3_double vect2, int nx, int ny) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + vect1.fetch(i,j,0) -= vect2.get(i,j,0); +} + + +/** method to sum 4 vectors vector1 = alfa*vector1 + beta*vector2 + gamma*vector3 + delta*vector4 */ +void sum4(arr3_double vect1, double alfa, const arr3_double vect2, double beta, const arr3_double vect3, double gamma, const arr3_double vect4, double delta, const arr3_double vect5, int nx, int ny, int nz) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) + vect1.fetch(i,j,k) = alfa * (vect2.get(i,j,k) + beta * vect3.get(i,j,k) + gamma * vect4.get(i,j,k) + delta * vect5.get(i,j,k)); + +} +/** method to calculate the scalar-vector product */ +void scale(double *vect, double alfa, int n) { + for (register int i = 0; i < n; i++) + vect[i] *= alfa; +} + +/** method to calculate the scalar-vector product */ +void scale(arr3_double vect, double alfa, int nx, int ny) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + vect.fetch(i,j,0) *= alfa; +} + + +/** method to calculate the scalar-vector product */ +void scale(arr3_double vect, double alfa, int nx, int ny, int nz) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) + vect.fetch(i,j,k) *= alfa; +} +/** method to calculate the scalar-vector product */ +void scale(arr3_double vect1, const arr3_double vect2, double alfa, int nx, int ny, int nz) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) + vect1.fetch(i,j,k) = vect2.get(i,j,k) * alfa; +} + +/** method to calculate the scalar-vector product */ +void scale(arr3_double vect1, const arr3_double vect2, double alfa, int nx, int ny) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + vect1.fetch(i,j,0) = vect2.get(i,j,0) * alfa; +} + +/** method to calculate the scalar-vector product */ +void scale(double *vect1, double *vect2, double alfa, int n) { + for (register int i = 0; i < n; i++) + vect1[i] = vect2[i] * alfa; +} + +/** method to calculate vector1 = vector1 + alfa*vector2 */ +void addscale(double alfa, arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) + vect1.fetch(i,j,k) = vect1.get(i,j,k) + alfa * vect2.get(i,j,k); +} +/** add scale for weights */ +void addscale(double alfa, double vect1[][2][2], double vect2[][2][2], int nx, int ny, int nz) { + for (int i = 0; i < nx; i++) + for (int j = 0; j < ny; j++) + for (int k = 0; k < nz; k++) + vect1[i][j][k] = vect1[i][j][k] + alfa * vect2[i][j][k]; + +} +/** method to calculate vector1 = vector1 + alfa*vector2 */ +void addscale(double alfa, arr3_double vect1, const arr3_double vect2, int nx, int ny) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + vect1.fetch(i,j,0) += alfa * vect2.get(i,j,0); +} +/** method to calculate vector1 = vector1 + alfa*vector2 */ +void addscale(double alfa, double *vect1, double *vect2, int n) { + for (register int i = 0; i < n; i++) + vect1[i] += alfa * vect2[i]; + +} +/** method to calculate vector1 = beta*vector1 + alfa*vector2 */ +void addscale(double alfa, double beta, double *vect1, double *vect2, int n) { + for (register int i = 0; i < n; i++) + vect1[i] = vect1[i] * beta + alfa * vect2[i]; + +} +/** method to calculate vector1 = beta*vector1 + alfa*vector2 */ +void addscale(double alfa, double beta, arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz) { + + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) { + vect1.fetch(i,j,k) = beta * vect1.get(i,j,k) + alfa * vect2.get(i,j,k); + } + +} +/** method to calculate vector1 = beta*vector1 + alfa*vector2 */ +void addscale(double alfa, double beta, arr3_double vect1, const arr3_double vect2, int nx, int ny) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + vect1.fetch(i,j,0) = beta * vect1.get(i,j,0) + alfa * vect2.get(i,j,0); + +} + + +/** method to calculate vector1 = alfa*vector2 + beta*vector3 */ +void scaleandsum(arr3_double vect1, double alfa, double beta, const arr3_double vect2, const arr3_double vect3, int nx, int ny, int nz) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) + vect1.fetch(i,j,k) = alfa * vect2.get(i,j,k) + beta * vect3.get(i,j,k); +} +/** method to calculate vector1 = alfa*vector2 + beta*vector3 with vector2 depending on species*/ +void scaleandsum(arr3_double vect1, double alfa, double beta, const arr4_double vect2, const arr3_double vect3, int ns, int nx, int ny, int nz) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) + vect1.fetch(i,j,k) = alfa * vect2.get(ns,i,j,k) + beta * vect3.get(i,j,k); +} +/** method to calculate vector1 = alfa*vector2*vector3 with vector2 depending on species*/ +void prod(arr3_double vect1, double alfa, const arr4_double vect2, int ns, const arr3_double vect3, int nx, int ny, int nz) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) + vect1.fetch(i,j,k) = alfa * vect2.get(ns,i,j,k) * vect3.get(i,j,k); + +} +/** method to calculate vect1 = vect2/alfa */ +void div(arr3_double vect1, double alfa, const arr3_double vect2, int nx, int ny, int nz) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) + vect1.fetch(i,j,k) = vect2.get(i,j,k) / alfa; + +} +void prod6(arr3_double vect1, const arr3_double vect2, const arr3_double vect3, const arr3_double vect4, const arr3_double vect5, const arr3_double vect6, const arr3_double vect7, int nx, int ny, int nz) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) + vect1.fetch(i,j,k) = vect2.get(i,j,k) * vect3.get(i,j,k) + vect4.get(i,j,k) * vect5.get(i,j,k) + vect6.get(i,j,k) * vect7.get(i,j,k); +} +/** method used for calculating PI */ +void proddiv(arr3_double vect1, const arr3_double vect2, double alfa, const arr3_double vect3, const arr3_double vect4, const arr3_double vect5, const arr3_double vect6, double beta, const arr3_double vect7, const arr3_double vect8, double gamma, const arr3_double vect9, int nx, int ny, int nz) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) + vect1.fetch(i,j,k) = (vect2.get(i,j,k) + alfa * (vect3.get(i,j,k) * vect4.get(i,j,k) - vect5.get(i,j,k) * vect6.get(i,j,k)) + beta * vect7.get(i,j,k) * vect8.get(i,j,k)) / (1 + gamma * vect9.get(i,j,k)); + + // questo mi convince veramente poco!!!!!!!!!!!!!! CAZZO!!!!!!!!!!!!!!!!!! + // ***vect1++ = (***vect2++ + alfa*((***vect3++)*(***vect4++) - (***vect5++)*(***vect6++)) + beta*(***vect7++)*(***vect8++))/(1+gamma*(***vect9++)); +} +/** method to calculate the opposite of a vector */ +void neg(arr3_double vect, int nx, int ny, int nz) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) + vect.fetch(i,j,k) = -vect.get(i,j,k); +} + +/** method to calculate the opposite of a vector */ +void neg(arr3_double vect, int nx, int ny) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + vect.fetch(i,j,0) = -vect.get(i,j,0); +} +/** method to calculate the opposite of a vector */ +void neg(arr3_double vect, int nx) { + for (register int i = 0; i < nx; i++) + vect.fetch(i,0,0) = -vect.get(i,0,0); +} +/** method to calculate the opposite of a vector */ +void neg(double *vect, int n) { + for (register int i = 0; i < n; i++) + vect[i] = -vect[i]; + + +} +/** method to set equal two vectors */ +void eq(arr3_double vect1, const arr3_double vect2, int nx, int ny, int nz) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) + vect1.fetch(i,j,k) = vect2.get(i,j,k); + +} +/** method to set equal two vectors */ +void eq(arr3_double vect1, const arr3_double vect2, int nx, int ny) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + vect1.fetch(i,j,0) = vect2.get(i,j,0); + +} + +/** method to set equal two vectors */ +void eq(arr4_double vect1, const arr3_double vect2, int nx, int ny, int is) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + vect1.fetch(is,i,j,0) = vect2.get(i,j,0); + +} +/** method to set equal two vectors */ +void eq(arr4_double vect1, const arr3_double vect2, int nx, int ny, int nz, int is) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) + vect1.fetch(is,i,j,k) = vect2.get(i,j,k); + +} + +/** method to set a vector to a Value */ +void eqValue(double value, arr3_double vect, int nx, int ny, int nz) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + for (register int k = 0; k < nz; k++) + vect.fetch(i,j,k) = value; + +} +//void eqValue(double value, double vect[][2][2], int nx, int ny, int nz) { +// for (int i = 0; i < nx; i++) +// for (int j = 0; j < ny; j++) +// for (int k = 0; k < nz; k++) +// vect[i][j][k] = value; +// +//} +/** method to set a vector to a Value */ +void eqValue(double value, arr3_double vect, int nx, int ny) { + for (register int i = 0; i < nx; i++) + for (register int j = 0; j < ny; j++) + vect.fetch(i,j,0) = value; + +} +/** method to set a vector to a Value */ +void eqValue(double value, arr3_double vect, int nx) { + for (register int i = 0; i < nx; i++) + vect.fetch(i,0,0) = value; + +} +/** method to set a vector to a Value */ +void eqValue(double value, double *vect, int n) { + for (register int i = 0; i < n; i++) + vect[i] = value; +} +/** method to put a column in a matrix 2D */ +void putColumn(double **Matrix, double *vect, int column, int n) { + for (int i = 0; i < n; i++) + Matrix[i][column] = vect[i]; + +} +/** method to get a column in a matrix 2D */ +void getColumn(double *vect, double **Matrix, int column, int n) { + for (int i = 0; i < n; i++) + vect[i] = Matrix[i][column]; +} +/** method to get rid of the ghost cells */ +void getRidGhost(double **out, double **in, int nx, int ny) { + for (register int i = 1; i < nx - 1; i++) + for (register int j = 1; j < ny - 1; j++) + out[i - 1][j - 1] = in[i][j]; +} + +void loopX(double *b, double z, double x, double y, double a, double zc, double xc, double yc, double m){ + + double r = sqrt((x-xc)*(x-xc)+(y-yc)*(y-yc)+(z-zc)*(z-zc)); + double theta = acos((z-zc+1e-10)/(r+1e-10)); + double phi = atan2(y-yc,x-xc); + //double Rho = r * sin(theta); + double Rho = sqrt((x-xc)*(x-xc)+(y-yc)*(y-yc)); + + double Alpha = Rho/a; + double Beta = (z-zc)/a; + double Gamma = (z-zc+1e-10)/(Rho+1e-10); + + double Q = ((1 + Alpha)*(1 + Alpha) + Beta*Beta); + double k = sqrt(4*Alpha/Q); + double B0 = m / (2*a); //m * (C_LIGHT * MU0)/(2*a*a*a*M_PI); + + int err = 0; + + double Bz = B0*(EllipticE(k,err)*(1-Alpha*Alpha-Beta*Beta)/(Q-4*Alpha)+EllipticF(k,err))/(M_PI*sqrt(Q)); + double BRho = B0*Gamma*(EllipticE(k,err)*(1+Alpha*Alpha+Beta*Beta)/(Q-4*Alpha)-EllipticF(k,err))/(M_PI*sqrt(Q)); + + if (err) + eprintf("Err came back :%d", err); + + if ( isnan(BRho) ) + BRho = 0; + if ( isnan(Bz) ) + Bz = 0; + + double Bx = BRho * cos(phi); + double By = BRho * sin(phi); + + //for debugging + /*cout << "\n\nAt (" << x << "," << y << "," << z << "), the field is :" << endl; + cout << "Bx: " << Bx << " T" << endl; + cout << "By: " << By << " T" << endl; + cout << "Bz: " << Bz << " T" << endl; + cout << "BRho: " << BRho << " T" << endl;*/ + + b[1] = Bx; + b[2] = By; + b[0] = Bz; +} + +void loopY(double *b, double y, double z, double x, double a, double yc, double zc, double xc, double m){ + + double r = sqrt((x-xc)*(x-xc)+(y-yc)*(y-yc)+(z-zc)*(z-zc)); + double theta = acos((z-zc+1e-10)/(r+1e-10)); + double phi = atan2(y-yc,x-xc); + //double Rho = r * sin(theta); + double Rho = sqrt((x-xc)*(x-xc)+(y-yc)*(y-yc)); + + double Alpha = Rho/a; + double Beta = (z-zc)/a; + double Gamma = (z-zc+1e-10)/(Rho+1e-10); + + double Q = ((1 + Alpha)*(1 + Alpha) + Beta*Beta); + double k = sqrt(4*Alpha/Q); + double B0 = m / (2*a); //m * (C_LIGHT * MU0)/(2*a*a*a*M_PI); + + int err = 0; + + double Bz = B0*(EllipticE(k,err)*(1-Alpha*Alpha-Beta*Beta)/(Q-4*Alpha)+EllipticF(k,err))/(M_PI*sqrt(Q)); + double BRho = B0*Gamma*(EllipticE(k,err)*(1+Alpha*Alpha+Beta*Beta)/(Q-4*Alpha)-EllipticF(k,err))/(M_PI*sqrt(Q)); + + if (err) + eprintf("Err came back :%d", err); + + if ( isnan(BRho) ) + BRho = 0; + if ( isnan(Bz) ) + Bz = 0; + + double Bx = BRho * cos(phi); + double By = BRho * sin(phi); + + //for debugging + /*cout << "\n\nAt (" << x << "," << y << "," << z << "), the field is :" << endl; + cout << "Bx: " << Bx << " T" << endl; + cout << "By: " << By << " T" << endl; + cout << "Bz: " << Bz << " T" << endl; + cout << "BRho: " << BRho << " T" << endl;*/ + + b[2] = Bx; + b[0] = By; + b[1] = Bz; +} + +void loopZ(double *b, double x, double y, double z, double a, double xc, double yc, double zc, double m){ + + double r = sqrt((x-xc)*(x-xc)+(y-yc)*(y-yc)+(z-zc)*(z-zc)); + double theta = acos((z-zc+1e-10)/(r+1e-10)); + double phi = atan2(y-yc,x-xc); + + double Rho = sqrt((x-xc)*(x-xc)+(y-yc)*(y-yc)); + + double Alpha = Rho/a; + double Beta = (z-zc)/a; + double Gamma = (z-zc+1e-10)/(Rho+1e-10); + + double Q = ((1 + Alpha)*(1 + Alpha) + Beta*Beta); + double k = sqrt(4*Alpha/Q); + double B0 = m / (2*a); //m * (C_LIGHT * MU0)/(2*a*a*a*M_PI); + + int err = 0; + + double Bz = B0*(EllipticE(k,err)*(1-Alpha*Alpha-Beta*Beta)/(Q-4*Alpha)+EllipticF(k,err))/(M_PI*sqrt(Q)); + double BRho = B0*Gamma*(EllipticE(k,err)*(1+Alpha*Alpha+Beta*Beta)/(Q-4*Alpha)-EllipticF(k,err))/(M_PI*sqrt(Q)); + + if (err) + eprintf("Err came back :%d", err); + + if ( isnan(BRho) ) + BRho = 0; + if ( isnan(Bz) ) + Bz = 0; + + double Bx = BRho * cos(phi); + double By = BRho * sin(phi); + + b[0] = Bx; + b[1] = By; + b[2] = Bz; +} + From d3ae3cfe777b4889dc75100aab49d7dbaa1c245b Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 7 Mar 2014 14:51:08 +0100 Subject: [PATCH 102/118] inserted #include "errors.h" forgotten in commit fe2a01ab --- utility/Basic.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/utility/Basic.cpp b/utility/Basic.cpp index 7725b680..06ca236d 100644 --- a/utility/Basic.cpp +++ b/utility/Basic.cpp @@ -1,11 +1,9 @@ #include "mpi.h" +#include "ipicdefs.h" #include "Basic.h" - -#include - -#include "MPIdata.h" #include "EllipticF.h" #include "Alloc.h" +#include "errors.h" /** method to calculate the parallel dot product with vect1, vect2 having the ghost cells*/ double dotP(double *vect1, double *vect2, int n) { From fb3473f0088359f43415b28291073e74969cbeed Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 7 Mar 2014 13:48:51 +0100 Subject: [PATCH 103/118] created former_MPI_Barrier macro --- include/ipicdefs.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/ipicdefs.h b/include/ipicdefs.h index 1031f60c..53b22cd8 100644 --- a/include/ipicdefs.h +++ b/include/ipicdefs.h @@ -9,6 +9,14 @@ // use precprocessor to remove MPI_Barrier() calls. #define MPI_Barrier(args...) +#define former_MPI_Barrier(args...) + +#define ipicMPI_Allreduce(args...) \ + { \ + static int count=0; \ + dprint(count++); \ + MPI_Allreduce(## args); \ + } //#define SINGLE_PRECISION_PCLS // From 091349abc43c8e791f3dfe4afcfbfc4c092ca40f Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 7 Mar 2014 14:35:53 +0100 Subject: [PATCH 104/118] moved MPI_Allreduce calls from .h to .cpp --- communication/ComParticles3D.cpp | 18 ++++++++++++++++++ include/ComParticles3D.h | 17 ++--------------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/communication/ComParticles3D.cpp b/communication/ComParticles3D.cpp index f74699ae..e78f0597 100644 --- a/communication/ComParticles3D.cpp +++ b/communication/ComParticles3D.cpp @@ -1,5 +1,6 @@ #include "ComParticles3D.h" +#include "ipicdefs.h" /** comunicate particles and receive particles to and from 6 processors */ void communicateParticles(int buffer_size, double *b_Xleft, double *b_Xright, double *b_Yleft, double *b_Yright, double *b_Zleft, double *b_Zright, VirtualTopology3D * vct) { @@ -10,3 +11,20 @@ void communicateParticles(int buffer_size, double *b_Xleft, double *b_Xright, do // DIR Z communicateParticlesDIR(buffer_size, vct->getCartesian_rank(), vct->getZright_neighbor_P(), vct->getZleft_neighbor_P(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), b_Zright, b_Zleft); } + +/** communicate the number of particles are not in the right domain*/ +int reduceNumberParticles(int rightDomain) { + int result = 0; + former_MPI_Barrier(MPI_COMM_WORLD); + MPI_Allreduce(&rightDomain, &result, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + return (result); +} + +/** communicate the maximum number of particles from a domain */ +int reduceMaxNpExiting(int npExitingMax) { + int result = 0; + former_MPI_Barrier(MPI_COMM_WORLD); + MPI_Allreduce(&npExitingMax, &result, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + return (result); +} + diff --git a/include/ComParticles3D.h b/include/ComParticles3D.h index 76090d45..44081df6 100644 --- a/include/ComParticles3D.h +++ b/include/ComParticles3D.h @@ -9,28 +9,15 @@ developers : Stefano Markidis, Giovanni Lapenta #ifndef ComParticles3D_H #define ComParticles3D_H -#include "MPIdata.h" -#include "ipicdefs.h" #include "ComBasic3D.h" /** comunicate particles and receive particles to and from 6 processors */ void communicateParticles(int buffer_size, double *b_Xleft, double *b_Xright, double *b_Yleft, double *b_Yright, double *b_Zleft, double *b_Zright, VirtualTopology3D * vct); /** communicate the number of particles are not in the right domain*/ -inline int reduceNumberParticles(int rightDomain) { - int result = 0; - MPI_Barrier(MPI_COMM_WORLD); - MPI_Allreduce(&rightDomain, &result, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - return (result); -} +int reduceNumberParticles(int rightDomain); /** communicate the maximum number of particles from a domain */ -inline int reduceMaxNpExiting(int npExitingMax) { - int result = 0; - MPI_Barrier(MPI_COMM_WORLD); - MPI_Allreduce(&npExitingMax, &result, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); - return (result); -} - +int reduceMaxNpExiting(int npExitingMax); #endif From b6639f26d056474ba34ce0dd78021b05ce6c539d Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 7 Mar 2014 14:37:12 +0100 Subject: [PATCH 105/118] commented out unused declarations in MPIdata.h --- include/MPIdata.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/MPIdata.h b/include/MPIdata.h index c4d535ab..36d29bcf 100644 --- a/include/MPIdata.h +++ b/include/MPIdata.h @@ -44,7 +44,7 @@ class MPIdata { /** print MPI data structure */ void Print(void); /** MPI status during the communication */ - MPI_Status status; + //MPI_Status status; public: static int get_rank(){return instance().rank;} static int get_nprocs(){return instance().nprocs;} @@ -55,7 +55,7 @@ class MPIdata { static int nprocs; // evidently unused... - char *buffer; - int buffer_size; + //char *buffer; + //int buffer_size; }; #endif From 645af80aef4c6840edad8c2546de2783d98f179d Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 7 Mar 2014 14:40:23 +0100 Subject: [PATCH 106/118] changed "ipic run" to call mpirun rather than mpiexec --- scripts/ipic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ipic.py b/scripts/ipic.py index f00b018c..3ae87d9e 100755 --- a/scripts/ipic.py +++ b/scripts/ipic.py @@ -68,7 +68,7 @@ def construct_run_command(args): output = 'data' inputfile = 'src/inputfiles/GEM.inp' hostname = '' - mpirun = 'mpiexec' + mpirun = 'mpirun' global system if system == 'xeon' or system == 'mic': if system == 'xeon': From 4bd2416cc3d3b08f79c3ba59e20dcbd93a234afb Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 7 Mar 2014 14:54:34 +0100 Subject: [PATCH 107/118] added number of threads to utility/MPIdata.cpp output --- utility/MPIdata.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/utility/MPIdata.cpp b/utility/MPIdata.cpp index 6baa3697..d70a2b50 100644 --- a/utility/MPIdata.cpp +++ b/utility/MPIdata.cpp @@ -1,10 +1,7 @@ #include -#include #include #include "MPIdata.h" - -using std::cout; -using std::endl; +#include "ompdefs.h" // for omp_get_max_threads // code to check that init() is called before instance() // @@ -49,10 +46,13 @@ void MPIdata::finalize_mpi() { } void MPIdata::Print(void) { - cout << endl; - cout << "Number of processes = " << get_nprocs() << endl; - cout << "-------------------------" << endl; - cout << endl; + printf("\n" + "Number of processes = %d\n" + "-------------------------\n" + "Number of threads = %d\n" + "-------------------------\n", + get_nprocs(), + omp_get_max_threads()); } // extern MPIdata *mpi; // instantiated in iPIC3D.cpp From f500fad934ef339c23e3e5295debd3e5a27c6891 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 7 Mar 2014 14:38:28 +0100 Subject: [PATCH 108/118] issue#62: changed getVelocityDistribution to make one MPI_Allreduce call (untested) --- particles/Particles3Dcomm.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/particles/Particles3Dcomm.cpp b/particles/Particles3Dcomm.cpp index 7183eb16..cb5f6688 100644 --- a/particles/Particles3Dcomm.cpp +++ b/particles/Particles3Dcomm.cpp @@ -981,13 +981,17 @@ long long *Particles3Dcomm::getVelocityDistribution(int nBins, double maxVel) { else f[bin] += 1; } - long long localN = 0; - long long totalN = 0; - for (int i = 0; i < nBins; i++) { - localN = f[i]; - MPI_Allreduce(&localN, &totalN, 1, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); - f[i] = totalN; - } + MPI_Allreduce(MPI_IN_PLACE, f, nBins, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); + // This way of summing is very inefficient + //{ + // long long localN = 0; + // long long totalN = 0; + // for (int i = 0; i < nBins; i++) { + // localN = f[i]; + // MPI_Allreduce(&localN, &totalN, 1, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); + // f[i] = totalN; + // } + //} return f; } From 699f6c02b62092d387dc9e9a5dba70b527c9c9fd Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 7 Mar 2014 15:02:08 +0100 Subject: [PATCH 109/118] issue#35: replaced MPI_Barrier with former_MPI_Barrier; MPI_Barrier is no longer no-op --- communication/ComInterpNodes3D.cpp | 8 +-- communication/ComNodes3D.cpp | 83 ++++++++++++++++++------------ include/ipicdefs.h | 4 +- main/iPic3Dlib.cpp | 8 +-- particles/Particles3D.cpp | 8 +-- performances/Timing.cpp | 6 +-- 6 files changed, 66 insertions(+), 51 deletions(-) diff --git a/communication/ComInterpNodes3D.cpp b/communication/ComInterpNodes3D.cpp index d04e6765..91e3e0a7 100644 --- a/communication/ComInterpNodes3D.cpp +++ b/communication/ComInterpNodes3D.cpp @@ -48,19 +48,19 @@ void communicateInterp(int nx, int ny, int nz, int ns, double**** vector, int bc // communicate twice each direction // X-DIRECTION: Z -> X -> Y - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor_P(), vct->getXleft_neighbor_P(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZleftEdge, ghostXleftYsameZleftEdge); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor_P(), vct->getXleft_neighbor_P(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZrightEdge, ghostXleftYsameZrightEdge); // Y-DIRECTION: X -> Y -> Z - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor_P(), vct->getYleft_neighbor_P(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXleftYrightZsameEdge, ghostXleftYleftZsameEdge); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor_P(), vct->getYleft_neighbor_P(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYrightZsameEdge, ghostXrightYleftZsameEdge); // Z-DIRECTION: Y -> Z - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor_P(), vct->getZleft_neighbor_P(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYleftZrightEdge, ghostXsameYleftZleftEdge); communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor_P(), vct->getZleft_neighbor_P(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYrightZrightEdge, ghostXsameYrightZleftEdge); // parse - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); addEdgeZ(nx, ny, nz, vector, ns, ghostXrightYrightZsameEdge, ghostXleftYleftZsameEdge, ghostXrightYleftZsameEdge, ghostXleftYrightZsameEdge, vct); addEdgeY(nx, ny, nz, vector, ns, ghostXrightYsameZrightEdge, ghostXleftYsameZleftEdge, ghostXleftYsameZrightEdge, ghostXrightYsameZleftEdge, vct); addEdgeX(nx, ny, nz, vector, ns, ghostXsameYrightZrightEdge, ghostXsameYleftZleftEdge, ghostXsameYleftZrightEdge, ghostXsameYrightZleftEdge, vct); diff --git a/communication/ComNodes3D.cpp b/communication/ComNodes3D.cpp index 6b0bb169..b3224c77 100644 --- a/communication/ComNodes3D.cpp +++ b/communication/ComNodes3D.cpp @@ -4,10 +4,13 @@ #include "TimeTasks.h" #include "ipicdefs.h" #include "Alloc.h" +#include "debug.h" +#include "parallel.h" /** communicate ghost cells (FOR NODES) */ void communicateNode(int nx, int ny, int nz, arr3_double _vector, VirtualTopology3D * vct) { timeTasks_set_communicating(); +// static int counter=0; if(is_output_thread()) { counter++; dprint(counter); } double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces @@ -54,19 +57,19 @@ void communicateNode(int nx, int ny, int nz, arr3_double _vector, VirtualTopolog // communicate twice each direction // X-DIRECTION: Z -> X - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor(), vct->getXleft_neighbor(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZleftEdge, ghostXleftYsameZleftEdge); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor(), vct->getXleft_neighbor(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZrightEdge, ghostXleftYsameZrightEdge); // Y-DIRECTION: X -> Y - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor(), vct->getYleft_neighbor(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXleftYrightZsameEdge, ghostXleftYleftZsameEdge); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor(), vct->getYleft_neighbor(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYrightZsameEdge, ghostXrightYleftZsameEdge); // Z-DIRECTION: Y -> Z - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor(), vct->getZleft_neighbor(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYleftZrightEdge, ghostXsameYleftZleftEdge); communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor(), vct->getZleft_neighbor(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYrightZrightEdge, ghostXsameYrightZleftEdge); // parse - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); parseEdgeZ(nx, ny, nz, vector, ghostXrightYrightZsameEdge, ghostXleftYleftZsameEdge, ghostXrightYleftZsameEdge, ghostXleftYrightZsameEdge); parseEdgeY(nx, ny, nz, vector, ghostXrightYsameZrightEdge, ghostXleftYsameZleftEdge, ghostXleftYsameZrightEdge, ghostXrightYsameZleftEdge); @@ -110,6 +113,7 @@ void communicateNode(int nx, int ny, int nz, arr3_double _vector, VirtualTopolog /** communicate ghost cells (FOR NODES) */ void communicateNodeBC(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks_set_communicating(); +// static int counter=0; if(is_output_thread()) { counter++; dprint(counter); } double ***vector = _vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; @@ -155,19 +159,19 @@ void communicateNodeBC(int nx, int ny, int nz, arr3_double _vector, int bcFaceXr // communicate twice each direction // X-DIRECTION: Z -> X - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor(), vct->getXleft_neighbor(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZleftEdge, ghostXleftYsameZleftEdge); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor(), vct->getXleft_neighbor(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZrightEdge, ghostXleftYsameZrightEdge); // Y-DIRECTION: X -> Y - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor(), vct->getYleft_neighbor(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXleftYrightZsameEdge, ghostXleftYleftZsameEdge); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor(), vct->getYleft_neighbor(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYrightZsameEdge, ghostXrightYleftZsameEdge); // Z-DIRECTION: Y -> Z - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor(), vct->getZleft_neighbor(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYleftZrightEdge, ghostXsameYleftZleftEdge); communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor(), vct->getZleft_neighbor(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYrightZrightEdge, ghostXsameYrightZleftEdge); // parse - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); parseEdgeZ(nx, ny, nz, vector, ghostXrightYrightZsameEdge, ghostXleftYleftZsameEdge, ghostXrightYleftZsameEdge, ghostXleftYrightZsameEdge); parseEdgeY(nx, ny, nz, vector, ghostXrightYsameZrightEdge, ghostXleftYsameZleftEdge, ghostXleftYsameZrightEdge, ghostXrightYsameZleftEdge); @@ -214,6 +218,7 @@ void communicateNodeBC(int nx, int ny, int nz, arr3_double _vector, int bcFaceXr /** communicate ghost cells (FOR NODES) with particles BC*/ void communicateNodeBC_P(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks_set_communicating(); +// static int counter=0; if(is_output_thread()) { counter++; dprint(counter); } double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; @@ -259,19 +264,19 @@ void communicateNodeBC_P(int nx, int ny, int nz, arr3_double _vector, int bcFace // communicate twice each direction // X-DIRECTION: Z -> X - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor_P(), vct->getXleft_neighbor_P(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZleftEdge, ghostXleftYsameZleftEdge); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor_P(), vct->getXleft_neighbor_P(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZrightEdge, ghostXleftYsameZrightEdge); // Y-DIRECTION: X -> Y - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor_P(), vct->getYleft_neighbor_P(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXleftYrightZsameEdge, ghostXleftYleftZsameEdge); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor_P(), vct->getYleft_neighbor_P(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYrightZsameEdge, ghostXrightYleftZsameEdge); // Z-DIRECTION: Y -> Z - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor_P(), vct->getZleft_neighbor_P(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYleftZrightEdge, ghostXsameYleftZleftEdge); communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor_P(), vct->getZleft_neighbor_P(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYrightZrightEdge, ghostXsameYrightZleftEdge); // parse - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); parseEdgeZ(nx, ny, nz, vector, ghostXrightYrightZsameEdge, ghostXleftYleftZsameEdge, ghostXrightYleftZsameEdge, ghostXleftYrightZsameEdge); parseEdgeY(nx, ny, nz, vector, ghostXrightYsameZrightEdge, ghostXleftYsameZleftEdge, ghostXleftYsameZrightEdge, ghostXrightYsameZleftEdge); @@ -319,6 +324,7 @@ void communicateNodeBC_P(int nx, int ny, int nz, arr3_double _vector, int bcFace /** SPECIES: communicate ghost cells */ void communicateNode(int nx, int ny, int nz, arr4_double _vector, int ns, VirtualTopology3D * vct) { timeTasks_set_communicating(); +// static int counter=0; if(is_output_thread()) { counter++; dprint(counter); } double ****vector = _vector.fetch_arr4(); // allocate 6 ghost cell Faces @@ -366,19 +372,19 @@ void communicateNode(int nx, int ny, int nz, arr4_double _vector, int ns, Virtua // communicate twice each direction // X-DIRECTION: Z -> X - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor(), vct->getXleft_neighbor(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZleftEdge, ghostXleftYsameZleftEdge); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor(), vct->getXleft_neighbor(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZrightEdge, ghostXleftYsameZrightEdge); // Y-DIRECTION: X -> Y - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor(), vct->getYleft_neighbor(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXleftYrightZsameEdge, ghostXleftYleftZsameEdge); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor(), vct->getYleft_neighbor(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYrightZsameEdge, ghostXrightYleftZsameEdge); // Z-DIRECTION: Y -> Z - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor(), vct->getZleft_neighbor(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYleftZrightEdge, ghostXsameYleftZleftEdge); communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor(), vct->getZleft_neighbor(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYrightZrightEdge, ghostXsameYrightZleftEdge); // parse - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); parseEdgeZ(nx, ny, nz, vector, ns, ghostXrightYrightZsameEdge, ghostXleftYleftZsameEdge, ghostXrightYleftZsameEdge, ghostXleftYrightZsameEdge); parseEdgeY(nx, ny, nz, vector, ns, ghostXrightYsameZrightEdge, ghostXleftYsameZleftEdge, ghostXleftYsameZrightEdge, ghostXrightYsameZleftEdge); parseEdgeX(nx, ny, nz, vector, ns, ghostXsameYrightZrightEdge, ghostXsameYleftZleftEdge, ghostXsameYleftZrightEdge, ghostXsameYrightZleftEdge); @@ -422,6 +428,7 @@ void communicateNode(int nx, int ny, int nz, arr4_double _vector, int ns, Virtua /** SPECIES: communicate ghost cells */ void communicateNode_P(int nx, int ny, int nz, arr4_double _vector, int ns, VirtualTopology3D * vct) { timeTasks_set_communicating(); +// static int counter=0; if(is_output_thread()) { counter++; dprint(counter); } double ****vector = _vector.fetch_arr4(); // allocate 6 ghost cell Faces @@ -469,19 +476,19 @@ void communicateNode_P(int nx, int ny, int nz, arr4_double _vector, int ns, Virt // communicate twice each direction // X-DIRECTION: Z -> X - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor_P(), vct->getXleft_neighbor_P(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZleftEdge, ghostXleftYsameZleftEdge); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor_P(), vct->getXleft_neighbor_P(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZrightEdge, ghostXleftYsameZrightEdge); // Y-DIRECTION: X -> Y - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor_P(), vct->getYleft_neighbor_P(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXleftYrightZsameEdge, ghostXleftYleftZsameEdge); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor_P(), vct->getYleft_neighbor_P(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYrightZsameEdge, ghostXrightYleftZsameEdge); // Z-DIRECTION: Y -> Z - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor_P(), vct->getZleft_neighbor_P(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYleftZrightEdge, ghostXsameYleftZleftEdge); communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor_P(), vct->getZleft_neighbor_P(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYrightZrightEdge, ghostXsameYrightZleftEdge); // parse - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); parseEdgeZ(nx, ny, nz, vector, ns, ghostXrightYrightZsameEdge, ghostXleftYleftZsameEdge, ghostXrightYleftZsameEdge, ghostXleftYrightZsameEdge); parseEdgeY(nx, ny, nz, vector, ns, ghostXrightYsameZrightEdge, ghostXleftYsameZleftEdge, ghostXleftYsameZrightEdge, ghostXrightYsameZleftEdge); parseEdgeX(nx, ny, nz, vector, ns, ghostXsameYrightZrightEdge, ghostXsameYleftZleftEdge, ghostXsameYleftZrightEdge, ghostXsameYrightZleftEdge); @@ -525,6 +532,7 @@ void communicateNode_P(int nx, int ny, int nz, arr4_double _vector, int ns, Virt /** communicate ghost cells (FOR CENTERS) */ void communicateCenter(int nx, int ny, int nz, arr3_double _vector, VirtualTopology3D * vct) { timeTasks_set_communicating(); +// static int counter=0; if(is_output_thread()) { counter++; dprint(counter); } double ***vector = _vector.fetch_arr3(); // allocate 6 ghost cell Faces @@ -571,19 +579,19 @@ void communicateCenter(int nx, int ny, int nz, arr3_double _vector, VirtualTopol // communicate twice each direction // X-DIRECTION: Z -> X - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor(), vct->getXleft_neighbor(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZleftEdge, ghostXleftYsameZleftEdge); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor(), vct->getXleft_neighbor(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZrightEdge, ghostXleftYsameZrightEdge); // Y-DIRECTION: X -> Y - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor(), vct->getYleft_neighbor(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXleftYrightZsameEdge, ghostXleftYleftZsameEdge); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor(), vct->getYleft_neighbor(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYrightZsameEdge, ghostXrightYleftZsameEdge); // Z-DIRECTION: Y -> Z - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor(), vct->getZleft_neighbor(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYleftZrightEdge, ghostXsameYleftZleftEdge); communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor(), vct->getZleft_neighbor(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYrightZrightEdge, ghostXsameYrightZleftEdge); // parse - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); parseEdgeZ(nx, ny, nz, vector, ghostXrightYrightZsameEdge, ghostXleftYleftZsameEdge, ghostXrightYleftZsameEdge, ghostXleftYrightZsameEdge); parseEdgeY(nx, ny, nz, vector, ghostXrightYsameZrightEdge, ghostXleftYsameZleftEdge, ghostXleftYsameZrightEdge, ghostXrightYsameZleftEdge); parseEdgeX(nx, ny, nz, vector, ghostXsameYrightZrightEdge, ghostXsameYleftZleftEdge, ghostXsameYleftZrightEdge, ghostXsameYrightZleftEdge); @@ -625,6 +633,7 @@ void communicateCenter(int nx, int ny, int nz, arr3_double _vector, VirtualTopol /** communicate ghost cells (FOR CENTERS) with BOX stencil*/ void communicateCenterBoxStencilBC(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks_set_communicating(); +// static int counter=0; if(is_output_thread()) { counter++; dprint(counter); } double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; @@ -657,6 +666,7 @@ void communicateCenterBoxStencilBC(int nx, int ny, int nz, arr3_double _vector, /** communicate ghost cells (FOR CENTERS) with BOX stencil*/ void communicateCenterBoxStencilBC_P(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks_set_communicating(); +// static int counter=0; if(is_output_thread()) { counter++; dprint(counter); } double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; @@ -691,6 +701,7 @@ void communicateCenterBoxStencilBC_P(int nx, int ny, int nz, arr3_double _vector void communicateNodeBoxStencilBC(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks_set_communicating(); +// static int counter=0; if(is_output_thread()) { counter++; dprint(counter); } double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; @@ -722,6 +733,7 @@ void communicateNodeBoxStencilBC(int nx, int ny, int nz, arr3_double _vector, in void communicateNodeBoxStencilBC_P(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks_set_communicating(); +// static int counter=0; if(is_output_thread()) { counter++; dprint(counter); } double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces double *ghostXrightFace = new double[(ny - 2) * (nz - 2)]; @@ -756,6 +768,7 @@ void communicateNodeBoxStencilBC_P(int nx, int ny, int nz, arr3_double _vector, /** SPECIES: communicate ghost cells */ void communicateCenter(int nx, int ny, int nz, arr4_double _vector, int ns, VirtualTopology3D * vct) { timeTasks_set_communicating(); +// static int counter=0; if(is_output_thread()) { counter++; dprint(counter); } double ****vector=_vector.fetch_arr4(); // allocate 6 ghost cell Faces @@ -801,19 +814,19 @@ void communicateCenter(int nx, int ny, int nz, arr4_double _vector, int ns, Virt // communicate twice each direction // X-DIRECTION: Z -> X - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor(), vct->getXleft_neighbor(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZleftEdge, ghostXleftYsameZleftEdge); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor(), vct->getXleft_neighbor(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZrightEdge, ghostXleftYsameZrightEdge); // Y-DIRECTION: X -> Y - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor(), vct->getYleft_neighbor(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXleftYrightZsameEdge, ghostXleftYleftZsameEdge); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor(), vct->getYleft_neighbor(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYrightZsameEdge, ghostXrightYleftZsameEdge); // Z-DIRECTION: Y -> Z - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor(), vct->getZleft_neighbor(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYleftZrightEdge, ghostXsameYleftZleftEdge); communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor(), vct->getZleft_neighbor(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYrightZrightEdge, ghostXsameYrightZleftEdge); // parse - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); parseEdgeZ(nx, ny, nz, vector, ns, ghostXrightYrightZsameEdge, ghostXleftYleftZsameEdge, ghostXrightYleftZsameEdge, ghostXleftYrightZsameEdge); parseEdgeY(nx, ny, nz, vector, ns, ghostXrightYsameZrightEdge, ghostXleftYsameZleftEdge, ghostXleftYsameZrightEdge, ghostXrightYsameZleftEdge); parseEdgeX(nx, ny, nz, vector, ns, ghostXsameYrightZrightEdge, ghostXsameYleftZleftEdge, ghostXsameYleftZrightEdge, ghostXsameYrightZleftEdge); @@ -855,6 +868,7 @@ void communicateCenter(int nx, int ny, int nz, arr4_double _vector, int ns, Virt // /////////// communication + BC //////////////////////////// void communicateCenterBC(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks_set_communicating(); +// static int counter=0; if(is_output_thread()) { counter++; dprint(counter); } double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces @@ -900,18 +914,18 @@ void communicateCenterBC(int nx, int ny, int nz, arr3_double _vector, int bcFace // communicate twice each direction // X-DIRECTION: Z -> X - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor(), vct->getXleft_neighbor(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZleftEdge, ghostXleftYsameZleftEdge); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor(), vct->getXleft_neighbor(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZrightEdge, ghostXleftYsameZrightEdge); // Y-DIRECTION: X -> Y - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor(), vct->getYleft_neighbor(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXleftYrightZsameEdge, ghostXleftYleftZsameEdge); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor(), vct->getYleft_neighbor(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYrightZsameEdge, ghostXrightYleftZsameEdge); // Z-DIRECTION: Y -> Z communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor(), vct->getZleft_neighbor(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYleftZrightEdge, ghostXsameYleftZleftEdge); communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor(), vct->getZleft_neighbor(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYrightZrightEdge, ghostXsameYrightZleftEdge); // parse - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); parseEdgeZ(nx, ny, nz, vector, ghostXrightYrightZsameEdge, ghostXleftYleftZsameEdge, ghostXrightYleftZsameEdge, ghostXleftYrightZsameEdge); parseEdgeY(nx, ny, nz, vector, ghostXrightYsameZrightEdge, ghostXleftYsameZleftEdge, ghostXleftYsameZrightEdge, ghostXrightYsameZleftEdge); parseEdgeX(nx, ny, nz, vector, ghostXsameYrightZrightEdge, ghostXsameYleftZleftEdge, ghostXsameYleftZrightEdge, ghostXsameYrightZleftEdge); @@ -957,6 +971,7 @@ void communicateCenterBC(int nx, int ny, int nz, arr3_double _vector, int bcFace // /////////// communication + BC //////////////////////////// void communicateCenterBC_P(int nx, int ny, int nz, arr3_double _vector, int bcFaceXright, int bcFaceXleft, int bcFaceYright, int bcFaceYleft, int bcFaceZright, int bcFaceZleft, VirtualTopology3D * vct) { timeTasks_set_communicating(); +// static int counter=0; if(is_output_thread()) { counter++; dprint(counter); } double ***vector=_vector.fetch_arr3(); // allocate 6 ghost cell Faces @@ -1002,18 +1017,18 @@ void communicateCenterBC_P(int nx, int ny, int nz, arr3_double _vector, int bcFa // communicate twice each direction // X-DIRECTION: Z -> X - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor_P(), vct->getXleft_neighbor_P(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZleftEdge, ghostXleftYsameZleftEdge); communicateGhostFace(ny - 2, vct->getCartesian_rank(), vct->getXright_neighbor_P(), vct->getXleft_neighbor_P(), 0, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYsameZrightEdge, ghostXleftYsameZrightEdge); // Y-DIRECTION: X -> Y - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor_P(), vct->getYleft_neighbor_P(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXleftYrightZsameEdge, ghostXleftYleftZsameEdge); communicateGhostFace(nz - 2, vct->getCartesian_rank(), vct->getYright_neighbor_P(), vct->getYleft_neighbor_P(), 1, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXrightYrightZsameEdge, ghostXrightYleftZsameEdge); // Z-DIRECTION: Y -> Z communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor_P(), vct->getZleft_neighbor_P(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYleftZrightEdge, ghostXsameYleftZleftEdge); communicateGhostFace(nx - 2, vct->getCartesian_rank(), vct->getZright_neighbor_P(), vct->getZleft_neighbor_P(), 2, vct->getXLEN(), vct->getYLEN(), vct->getZLEN(), ghostXsameYrightZrightEdge, ghostXsameYrightZleftEdge); // parse - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); parseEdgeZ(nx, ny, nz, vector, ghostXrightYrightZsameEdge, ghostXleftYleftZsameEdge, ghostXrightYleftZsameEdge, ghostXleftYrightZsameEdge); parseEdgeY(nx, ny, nz, vector, ghostXrightYsameZrightEdge, ghostXleftYsameZleftEdge, ghostXleftYsameZrightEdge, ghostXrightYsameZleftEdge); parseEdgeX(nx, ny, nz, vector, ghostXsameYrightZrightEdge, ghostXsameYleftZleftEdge, ghostXsameYleftZrightEdge, ghostXsameYrightZleftEdge); diff --git a/include/ipicdefs.h b/include/ipicdefs.h index 53b22cd8..765694d1 100644 --- a/include/ipicdefs.h +++ b/include/ipicdefs.h @@ -7,8 +7,8 @@ // uncomment the following line to use parallel hdf5 //#define USING_PARALLEL_HDF5 -// use precprocessor to remove MPI_Barrier() calls. -#define MPI_Barrier(args...) +// use precprocessor to remove former MPI_Barrier() calls. +//#define MPI_Barrier(args...) #define former_MPI_Barrier(args...) #define ipicMPI_Allreduce(args...) \ diff --git a/main/iPic3Dlib.cpp b/main/iPic3Dlib.cpp index 33f9a96c..401123bf 100644 --- a/main/iPic3Dlib.cpp +++ b/main/iPic3Dlib.cpp @@ -63,7 +63,7 @@ int c_Solver::Init(int argc, char **argv) { col->save(); } // Create the local grid - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); grid = new Grid3DCU(col, vct); // Create the local grid EMf = new EMfields3D(col, grid); // Create Electromagnetic Fields Object @@ -139,7 +139,7 @@ int c_Solver::Init(int argc, char **argv) { hdf5_agent.close(); } - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); Eenergy, Benergy, TOTenergy = 0.0, TOTmomentum = 0.0; Ke = new double[ns]; momentum = new double[ns]; @@ -242,11 +242,11 @@ void c_Solver::CalculateMoments() { EMf->ConstantChargeOpenBC(grid, vct); // Set a constant charge in the OpenBC boundaries - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); EMf->interpDensitiesN2C(vct, grid); // calculate densities on centers from nodes EMf->calculateHatFunctions(grid, vct); // calculate the hat quantities for the implicit method - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); } //! MAXWELL SOLVER for Efield diff --git a/particles/Particles3D.cpp b/particles/Particles3D.cpp index 5186a4d7..4d9e50bf 100644 --- a/particles/Particles3D.cpp +++ b/particles/Particles3D.cpp @@ -1088,14 +1088,14 @@ int Particles3D::communicate_particles(VirtualTopology3D * vct) const int avail = communicate(vct); if (avail < 0) return (-1); - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); // communicate again if particles are not in the correct domain while (isMessagingDone(vct) > 0) { // COMMUNICATION const int avail = communicate(vct); if (avail < 0) return (-1); - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); } return 0; // exit successfully } @@ -1463,7 +1463,7 @@ int Particles3D::particle_repopulator(Grid* grid,VirtualTopology3D* vct, Field* avail = communicate(vct); if (avail < 0) return(-1); - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); // communicate again if particles are not in the correct domain while(isMessagingDone(vct) >0){ @@ -1471,7 +1471,7 @@ int Particles3D::particle_repopulator(Grid* grid,VirtualTopology3D* vct, Field* avail = communicate(vct); if (avail < 0) return(-1); - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); } return(0); // exit succcesfully (hopefully) diff --git a/performances/Timing.cpp b/performances/Timing.cpp index e639fd88..11a27364 100644 --- a/performances/Timing.cpp +++ b/performances/Timing.cpp @@ -35,7 +35,7 @@ Timing::Timing(int my_rank) { // MPE_Describe_state(event2a,event2b,"Field","blue"); // the mover is blue in the visualizer // MPE_Describe_state(event3a,event3b,"Interp P->G","yellow"); // the interpolation particle->Grid is yellow in the visualizer // } - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); // start the log // MPE_Start_log(); @@ -44,12 +44,12 @@ Timing::Timing(int my_rank) { /** start the timer */ void Timing::startTiming() { ttick = MPI_Wtick(); - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); tstart = MPI_Wtime(); } /** stop the timer */ void Timing::stopTiming() { - MPI_Barrier(MPI_COMM_WORLD); + former_MPI_Barrier(MPI_COMM_WORLD); tend = MPI_Wtime(); texecution = tend - tstart; if (rank_id == 0) { From 88ee39ef0c1f2d4a60fbaed48c0459fec793438e Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 7 Mar 2014 17:05:01 +0100 Subject: [PATCH 110/118] issue #63: made invalid_value_error thread safe --- utility/errors.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/utility/errors.cpp b/utility/errors.cpp index 9d5d66a9..4cca0143 100644 --- a/utility/errors.cpp +++ b/utility/errors.cpp @@ -6,8 +6,8 @@ #include #include #include +#include #include "errors.h" -//#include "MPIdata.h" // for rank /** implementation of declarations in errors.h **/ @@ -33,20 +33,24 @@ void fprintf_fileLine(FILE * fptr, const char *type, const char *func, const cha // abort(); //} -// This needs to be fixed to be thread-safe like -// eprintf_fileLine() below. Write the message to a string and -// then print it out as an atomic operation. +// lazy implementation using streams class // -#include using namespace std; #define implement_invalid_value_error(t1) \ void invalid_value_error_fileLine(const char* file, int line, const char* func, \ const char* type, const char* expr, t1 val) \ { \ - std::cerr<< "ERROR in file " << file << ", line " << line \ + /* To be thread-safe, write the message to a string and \ + * then print it out as an atomic operation. */ \ + std::stringstream ss; \ + ss << "(" << MPIdata::get_rank() << "." << omp_get_thread_num() << ") " \ + << "ERROR in file " << file << ", line " << line \ << ", function " << func \ <<"\n\t" << type << " value: " << expr << " = " << val << endl; \ - abort(); \ + fflush(stdout); \ + { fprintf(stdout,ss.str().c_str()); } \ + fflush(stdout); \ + abort(); \ } implement_invalid_value_error(double); From 8a1729372f64b9aa09eb38e715db3ff8d9e3a9b3 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 7 Mar 2014 17:25:20 +0100 Subject: [PATCH 111/118] warning messages should issue warning and proceed, not exit --- include/errors.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/errors.h b/include/errors.h index ace6b5f2..88281428 100644 --- a/include/errors.h +++ b/include/errors.h @@ -15,7 +15,7 @@ void eprintf_fileLine(FILE * fptr, const char *type, //#define eprintf(args...) \ // eprintf_fileLine("ERROR",__func__, __FILE__, __LINE__, ## args); #define warning_printf(args...) \ - eprintf_fileLine("WARNING",__func__, __FILE__, __LINE__, ## args); + fprintf_fileLine(stdout,"WARNING",__func__, __FILE__, __LINE__, ## args); #define declare_invalid_value_error(t1) \ void invalid_value_error_fileLine(const char* file, int line, const char* func, \ const char* type, const char* expr, t1 val); From 0996301a024d66b09e582f9fb0d60714f3db3bcf Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 7 Mar 2014 17:28:24 +0100 Subject: [PATCH 112/118] implemented MPIdata::exit(code) to exit after calling MPI_Finalize() --- include/MPIdata.h | 4 +++- utility/MPIdata.cpp | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/include/MPIdata.h b/include/MPIdata.h index 36d29bcf..ef4e7a30 100644 --- a/include/MPIdata.h +++ b/include/MPIdata.h @@ -40,7 +40,9 @@ class MPIdata { /** initialize MPI environment */ static void init(int *, char ***); /** close MPI environment */ - void finalize_mpi(); + static void finalize_mpi(); + /** finalize and exit with error code */ + static void exit(int code); /** print MPI data structure */ void Print(void); /** MPI status during the communication */ diff --git a/utility/MPIdata.cpp b/utility/MPIdata.cpp index d70a2b50..d9eb1895 100644 --- a/utility/MPIdata.cpp +++ b/utility/MPIdata.cpp @@ -41,6 +41,11 @@ void MPIdata::init(int *argc, char ***argv) { MPIdata_is_initialized = true; } +void MPIdata::exit(int code) { + finalize_mpi(); + ::exit(code); +} + void MPIdata::finalize_mpi() { MPI_Finalize(); } From 507e0a613e3301a142d4481ba8099b9777ba988a Mon Sep 17 00:00:00 2001 From: eajohnson Date: Fri, 7 Mar 2014 17:29:19 +0100 Subject: [PATCH 113/118] issue #64: MPI should be initialized immediately --- iPic3D.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iPic3D.cpp b/iPic3D.cpp index 91129c7b..cff2f756 100644 --- a/iPic3D.cpp +++ b/iPic3D.cpp @@ -9,10 +9,10 @@ using namespace iPic3D; int main(int argc, char **argv) { + MPIdata::init(&argc, &argv); iPic3D::c_Solver KCode; bool b_err = false; - MPIdata::init(&argc, &argv); KCode.Init(argc, argv); for (int i = KCode.FirstCycle(); i < KCode.LastCycle(); i++) { From 333cae31df253e0250fe9fd0a73d5629f5939bd2 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 10 Mar 2014 15:39:32 +0100 Subject: [PATCH 114/118] issue #66: moving includes out of Collective.h --- include/Collective.h | 11 +---------- include/input_array.h | 2 +- inputoutput/Collective.cpp | 12 +++++++++++- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/include/Collective.h b/include/Collective.h index 382611b6..95e929dd 100644 --- a/include/Collective.h +++ b/include/Collective.h @@ -13,18 +13,9 @@ #endif -#include -//#include -//#include -#include -#include -#include -#include "ConfigFile.h" -#include "input_array.h" -#include "hdf5.h" //#include "CollectiveIO.h" +class ConfigFile; using namespace std; - using std::cout; using std::endl; using std::ofstream; diff --git a/include/input_array.h b/include/input_array.h index 6f8bdd94..be25ec5d 100644 --- a/include/input_array.h +++ b/include/input_array.h @@ -5,7 +5,7 @@ // Modified P. Henri 8 June 2011 // corrected by Markidis -#include +#include struct array_int { int a, b, c, d, e, f; diff --git a/inputoutput/Collective.cpp b/inputoutput/Collective.cpp index 5f0b7bbe..1675dffe 100644 --- a/inputoutput/Collective.cpp +++ b/inputoutput/Collective.cpp @@ -1,8 +1,18 @@ #include +#include +//#include +//#include +#include +#include +#include +#include "input_array.h" +#include "hdf5.h" #include "Collective.h" -#include "debug.h" +#include "ConfigFile.h" #include "limits.h" // for INT_MAX +#include "MPIdata.h" +#include "errors.h" #include "asserts.h" // for assert_ge /*! Read the input file from text file and put the data in a collective wrapper: if it's a restart read from input file basic sim data and load particles and EM field from restart file */ From 965aeaf0d9d073838a335ecb2865d8f485552ed0 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 10 Mar 2014 15:48:14 +0100 Subject: [PATCH 115/118] iss #65, iss #67: defined options for second-order accuracy --- include/Collective.h | 33 ++++++++++++++++++++- inputoutput/Collective.cpp | 59 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/include/Collective.h b/include/Collective.h index 95e929dd..b4f753cd 100644 --- a/include/Collective.h +++ b/include/Collective.h @@ -26,6 +26,21 @@ class Collective : public InterfaceFluid #endif { + private: + enum Enum{ + thedefault=0, + initial, + final, + // used by ImplSusceptMode + explPredict, + implPredict, + NUMBER_OF_ENUMS, // this must be last + INVALID_ENUM + }; + int read_enum_parameter(const char* option_name, char* default_value, + const ConfigFile& config); + public: + static const char* get_name_of_enum(int in); public: /*! constructor: initialize physical parameters with values */ Collective(int argc, char **argv); @@ -65,6 +80,10 @@ class Collective double getC()const{ return (c); } double getDt()const{ return (dt); } double getTh()const{ return (th); } + double getPushWithBatTime()const{ return PushWithBatTime; } + double getPushWithEatTime()const{ return PushWithEatTime; } + double getImplSusceptTime()const{ return ImplSusceptTime; } + int getImplSusceptMode()const{ return ImplSusceptMode; } double getSmooth()const{ return (Smooth); } int getNcycles()const{ return (ncycles); } int getNs()const{ return (ns); } @@ -145,8 +164,20 @@ class Collective double fourpi; /*! time step */ double dt; + // + // parameters used to support second order accuracy in time + // /*! decentering parameter */ - double th; + double th; // second-order for th=1/2, stable for 1/2 <= th <= 1 + /*! time of magnetic field used in particle push (0=initial, 1=final) */ + double PushWithBatTime; // 0=initial (default), 1=final + /*! time of electric field used in particle push */ + double PushWithEatTime; // 0=initial, 1=final (default) + /*! means of estimating time-advanced implicit susceptibility */ + int ImplSusceptMode; // "initial" (default), "explPredict", "implPredict" + /*! time of implicit susceptibility used in field advance */ + double ImplSusceptTime; // 0=initial (default), 1=final + // /*! Smoothing value */ double Smooth; /*! number of time cycles */ diff --git a/inputoutput/Collective.cpp b/inputoutput/Collective.cpp index 1675dffe..f5179c67 100644 --- a/inputoutput/Collective.cpp +++ b/inputoutput/Collective.cpp @@ -15,6 +15,49 @@ #include "errors.h" #include "asserts.h" // for assert_ge +// order must agree with Enum in Collective.h +static const char *enumNames[] = +{ + "default", + "initial", + "final", + // used by ImplSusceptMode + "explPredict", + "implPredict", + // marker for last enumerated symbol of this class + "NUMBER_OF_ENUMS", + "INVALID_ENUM" +}; + +int Collective::read_enum_parameter(const char* option_name, char* default_value, + const ConfigFile& config) +{ + string enum_name = config.read < string >(option_name,default_value); + // search the list (could use std::map) + // + for(int i=0;i("ns"); NpMaxNpRatio = config.read < double >("NpMaxNpRatio"); assert_ge(NpMaxNpRatio, 1.); + // mode parameters for second order in time + PushWithBatTime = config.read < double >("PushWithBatTime",0); + PushWithEatTime = config.read < double >("PushWithEatTime",1); + ImplSusceptTime = config.read < double >("ImplSusceptTime",0); + ImplSusceptMode = read_enum_parameter("ImplSusceptMode", "initial",config); + switch(ImplSusceptMode) + { + // values not yet supported: + case explPredict: + case implPredict: + default: + unsupported_value_error(ImplSusceptMode); + // supported values: + case initial: + ; + } // GEM Challenge B0x = config.read ("B0x"); B0y = config.read ("B0y"); From 776da31af6c8b88505ca3852e4c99dd9da1bce45 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 10 Mar 2014 17:25:24 +0100 Subject: [PATCH 116/118] fixed compile errors on icpc probably introduced two commits ago --- fields/EMfields3D.cpp | 1 + grids/Grid3DCU.cpp | 22 +++++++++++++++------- include/Collective.h | 6 +----- include/Grid3DCU.h | 3 --- include/input_array.h | 2 +- inputoutput/Collective.cpp | 2 +- 6 files changed, 19 insertions(+), 17 deletions(-) diff --git a/fields/EMfields3D.cpp b/fields/EMfields3D.cpp index 2bcc6fef..49747cc2 100644 --- a/fields/EMfields3D.cpp +++ b/fields/EMfields3D.cpp @@ -7,6 +7,7 @@ #include "Parameters.h" #include "ompdefs.h" #include "debug.h" +#include "string.h" // for memset /*! constructor */ // diff --git a/grids/Grid3DCU.cpp b/grids/Grid3DCU.cpp index f13d8aed..081a0855 100644 --- a/grids/Grid3DCU.cpp +++ b/grids/Grid3DCU.cpp @@ -87,13 +87,21 @@ Grid3DCU::~Grid3DCU() { /** print the local grid info */ void Grid3DCU::print(VirtualTopology3D * ptVCT) { - cout << endl; - cout << "Subgrid (" << ptVCT->getCoordinates(0) << "," << ptVCT->getCoordinates(1) << "," << ptVCT->getCoordinates(2) << ")" << endl; - cout << "Number of cell: -X=" << nxc - 2 << " -Y=" << nyc - 2 << " -Z=" << nzc - 2 << endl; - cout << "Xin = " << node_xcoord[1] << "; Xfin = " << node_xcoord[nxn - 2] << endl; - cout << "Yin = " << node_ycoord[1] << "; Yfin = " << node_ycoord[nyn - 2] << endl; - cout << "Zin = " << node_zcoord[1] << "; Zfin = " << node_zcoord[nzn - 2] << endl; - cout << endl; + printf("\nSubgrid (%d,%d,%d)\n", + ptVCT->getCoordinates(0), + ptVCT->getCoordinates(1), + ptVCT->getCoordinates(2)); + printf("Number of cells: X:%d, Y:%d, Z:%d\n", + nxc - 2, + nyc - 2, + nzc - 2); + printf( + "Xin = %d; Xfin = %d\n" + "Yin = %d; Yfin = %d\n" + "Zin = %d; Zfin = %d\n\n", + node_xcoord[1], node_xcoord[nxn - 2], + node_ycoord[1], node_ycoord[nyn - 2], + node_zcoord[1], node_zcoord[nzn - 2]); } /** calculate gradient on nodes, given a scalar field defined on central points */ diff --git a/include/Collective.h b/include/Collective.h index b4f753cd..f98c3dd0 100644 --- a/include/Collective.h +++ b/include/Collective.h @@ -11,15 +11,11 @@ #ifdef BATSRUS #include "InterfaceFluid.h" #endif - +#include //#include "CollectiveIO.h" class ConfigFile; using namespace std; -using std::cout; -using std::endl; -using std::ofstream; -using namespace std; class Collective #ifdef BATSRUS diff --git a/include/Grid3DCU.h b/include/Grid3DCU.h index 92651196..497bbf0c 100644 --- a/include/Grid3DCU.h +++ b/include/Grid3DCU.h @@ -14,9 +14,6 @@ #include "VirtualTopology3D.h" #include "Alloc.h" -using std::cout; -using std::endl; - /** * Uniform cartesian local grid 3D * diff --git a/include/input_array.h b/include/input_array.h index be25ec5d..6f8bdd94 100644 --- a/include/input_array.h +++ b/include/input_array.h @@ -5,7 +5,7 @@ // Modified P. Henri 8 June 2011 // corrected by Markidis -#include +#include struct array_int { int a, b, c, d, e, f; diff --git a/inputoutput/Collective.cpp b/inputoutput/Collective.cpp index f5179c67..f60fbda2 100644 --- a/inputoutput/Collective.cpp +++ b/inputoutput/Collective.cpp @@ -3,7 +3,6 @@ #include //#include //#include -#include #include #include #include "input_array.h" @@ -14,6 +13,7 @@ #include "MPIdata.h" #include "errors.h" #include "asserts.h" // for assert_ge +#include "string.h" // order must agree with Enum in Collective.h static const char *enumNames[] = From e5fa332846c270c6d6f74412b4f109b3026f1c54 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Mon, 10 Mar 2014 17:41:23 +0100 Subject: [PATCH 117/118] "ipic exec" now calls mpiexec, just as "ipic run" calls mpirun --- scripts/ipic.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/ipic.py b/scripts/ipic.py index 3ae87d9e..6b2e2540 100755 --- a/scripts/ipic.py +++ b/scripts/ipic.py @@ -56,7 +56,7 @@ def issue_shell_command(command): print '+', command os.system(command) -def construct_run_command(args): +def construct_run_command(args,mpirun): # convert from deque to list for getopts args = list(args) @@ -68,7 +68,6 @@ def construct_run_command(args): output = 'data' inputfile = 'src/inputfiles/GEM.inp' hostname = '' - mpirun = 'mpirun' global system if system == 'xeon' or system == 'mic': if system == 'xeon': @@ -169,7 +168,11 @@ def construct_run_command(args): return command def ipic_run(args): - command = construct_run_command(args); + command = construct_run_command(args,'mpirun'); + issue_command(command) + +def ipic_exec(args): + command = construct_run_command(args,'mpiexec'); issue_command(command) def ipic_show_run(args): @@ -497,6 +500,8 @@ def ipic_command(argv1): ipic_cmake(args) elif command == "run": ipic_run(args) + elif command == "exec": + ipic_exec(args) elif command == "findcpph": ipic_findcpph(args) else: From 8c8d6577a0ebfe0e5dd3afc6bd8a622d7c71d5c6 Mon Sep 17 00:00:00 2001 From: eajohnson Date: Wed, 26 Mar 2014 11:50:41 +0100 Subject: [PATCH 118/118] removed unnecessary header includes in Grid3DCU.h (iss #66) --- grids/Grid3DCU.cpp | 4 ++++ include/Grid3DCU.h | 10 ++++------ include/PSKOutput.h | 2 +- include/arraysfwd.h | 4 ++-- include/ipicfwd.h | 6 ++++++ 5 files changed, 17 insertions(+), 9 deletions(-) create mode 100644 include/ipicfwd.h diff --git a/grids/Grid3DCU.cpp b/grids/Grid3DCU.cpp index 081a0855..914ae4a9 100644 --- a/grids/Grid3DCU.cpp +++ b/grids/Grid3DCU.cpp @@ -2,6 +2,10 @@ #include #include "Grid3DCU.h" #include "MPIdata.h" +#include "Alloc.h" +#include "CollectiveIO.h" +#include "ComNodes3D.h" // for communicateCenterBC +#include "VirtualTopology3D.h" /*! constructor */ Grid3DCU::Grid3DCU(CollectiveIO * col, VirtualTopology3D * vct) { diff --git a/include/Grid3DCU.h b/include/Grid3DCU.h index 497bbf0c..2b688557 100644 --- a/include/Grid3DCU.h +++ b/include/Grid3DCU.h @@ -7,13 +7,11 @@ #ifndef GRID3DCU_H #define GRID3DCU_H -#include "Grid.h" -#include "CollectiveIO.h" -#include "ComInterpNodes3D.h" -#include "ComNodes3D.h" -#include "VirtualTopology3D.h" -#include "Alloc.h" +#include "arraysfwd.h" +#include "ipicfwd.h" +#include "math.h" // for floor +class VirtualTopology3D; /** * Uniform cartesian local grid 3D * diff --git a/include/PSKOutput.h b/include/PSKOutput.h index cdc5bbc5..2c461c64 100644 --- a/include/PSKOutput.h +++ b/include/PSKOutput.h @@ -14,10 +14,10 @@ developers: D. Burgess, June/July 2006 #include #include "errors.h" +#include "Grid.h" #include "PSKException.h" #include "Particles3Dcomm.h" #include "Field.h" -#include "Grid.h" #include "Collective.h" #include "VCtopology3D.h" #include "MPIdata.h" diff --git a/include/arraysfwd.h b/include/arraysfwd.h index 0afc2826..ca14498b 100644 --- a/include/arraysfwd.h +++ b/include/arraysfwd.h @@ -60,8 +60,8 @@ typedef iPic3D::array4 array4_pfloat; // This directive should be consistent with the directives in Alloc.h #if defined(FLAT_ARRAYS) || defined(CHECK_BOUNDS) typedef iPic3D::array_fetch1 arr1_double_fetch; -typedef iPic3D::array_get1 arr1_double_get; -typedef iPic3D::array_get1 arr1_pfloat_get; +typedef iPic3D::const_array_get1 arr1_double_get; +typedef iPic3D::const_array_get1 arr1_pfloat_get; typedef iPic3D::array_fetch2 arr2_double_fetch; typedef iPic3D::array_fetch3 arr3_double_fetch; #else diff --git a/include/ipicfwd.h b/include/ipicfwd.h new file mode 100644 index 00000000..d546e984 --- /dev/null +++ b/include/ipicfwd.h @@ -0,0 +1,6 @@ +#ifndef ipicfwd_h +#define ipicfwd_h +// forward declarations for iPic3D +class Collective; +typedef Collective CollectiveIO; +#endif