diff --git a/src/allgatherv/Makefile b/src/allgatherv/Makefile index 45b9c65..467d3cc 100644 --- a/src/allgatherv/Makefile +++ b/src/allgatherv/Makefile @@ -15,35 +15,41 @@ all: liballgatherv.so \ liballgatherv_comparebuffcontent.so \ liballgatherv_late_arrival.so -liballgatherv_displs.so: ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_displs.o ../common/logger_displs.o mpi_allgatherv.c allgatherv_profiler.h - mpicc -I../ -I../common/ -g -shared -Wall -fPIC -DENABLE_DISPLS=1 ../common/logger_for_displs.o ${COMMON_OBJECTS} ../common/timings.o ../common/logger_displs.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_displs.so +liballgatherv_displs.so: check-env ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_displs.o ../common/logger_displs.o mpi_allgatherv.c allgatherv_profiler.h + mpicc -I../ -I../common/ $(CFLAGS) -g -shared -Wall -fPIC -DENABLE_DISPLS=1 ../common/logger_for_displs.o ${COMMON_OBJECTS} ../common/timings.o ../common/logger_displs.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_displs.so $(LDFLAGS) -liballgatherv_counts.so: ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o ../common/logger_for_counts.o mpi_allgatherv.c allgatherv_profiler.h - mpicc -I../ -I../common/ -g -shared -Wall -fPIC -DENABLE_RAW_DATA=1 -DENABLE_COUNTS=1 ../common/logger_for_counts.o ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_counts.so - mpicc -I../ -I../common/ -g -shared -Wall -fPIC -DENABLE_COMPACT_FORMAT=0 -DENABLE_COUNTS=1 -DENABLE_RAW_DATA=1 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_counts.o ../common/logger_counts.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_counts_notcompact.so +liballgatherv_counts.so: check-env ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o ../common/logger_for_counts.o mpi_allgatherv.c allgatherv_profiler.h + mpicc -I../ -I../common/ -g $(CFLAGS) -shared -Wall -fPIC -DENABLE_RAW_DATA=1 -DENABLE_COUNTS=1 ../common/logger_for_counts.o ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_counts.so $(LDFLAGS) + mpicc -I../ -I../common/ -g -shared -Wall -fPIC $(CFLAGS) -DENABLE_COMPACT_FORMAT=0 -DENABLE_COUNTS=1 -DENABLE_RAW_DATA=1 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_counts.o ../common/logger_counts.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_counts_notcompact.so $(LDFLAGS) -liballgatherv_exec_timings.so: ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h - mpicc -I../ -I../common/ -g -shared -Wall -fPIC -DENABLE_EXEC_TIMING=1 ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_exec_timings.so +liballgatherv_exec_timings.so: check-env ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h + mpicc -I../ -I../common/ -g -shared -Wall -fPIC $(CFLAGS) -DENABLE_EXEC_TIMING=1 ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_exec_timings.so $(LDFLAGS) -liballgatherv_late_arrival.so: ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h - mpicc -I../ -I../common/ -g -shared -Wall -fPIC -DENABLE_LATE_ARRIVAL_TIMING=1 ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_late_arrival.so +liballgatherv_late_arrival.so: check-env ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h + mpicc -I../ -I../common/ -g -shared -Wall -fPIC $(CFLAGS) -DENABLE_LATE_ARRIVAL_TIMING=1 ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_late_arrival.so $(LDFLAGS) -liballgatherv_backtrace.so: ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h - mpicc -I../ -I../common/ -g -shared -Wall -fPIC -DENABLE_BACKTRACE=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_backtrace.so +liballgatherv_backtrace.so: check-env ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h + mpicc -I../ -I../common/ -g -shared -Wall -fPIC $(CFLAGS) -DENABLE_BACKTRACE=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_backtrace.so $(LDFLAGS) -liballgatherv_location.so: ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h - mpicc -I../ -I../common/ -g -shared -Wall -fPIC -DENABLE_LOCATION_TRACKING=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_location.so +liballgatherv_location.so: check-env ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h + mpicc -I../ -I../common/ -g -shared -Wall -fPIC $(CFLAGS) -DENABLE_LOCATION_TRACKING=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_location.so $(LDFLAGS) -liballgatherv_savebuffcontent.so: ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h - mpicc -I../ -I../common/ -g -shared -Wall -fPIC -DENABLE_SAVE_DATA_VALIDATION=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_savebuffcontent.so -lssl -lcrypto +liballgatherv_savebuffcontent.so: check-env ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h + mpicc -I../ -I../common/ -g -shared -Wall -fPIC $(CFLAGS) -DENABLE_SAVE_DATA_VALIDATION=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_savebuffcontent.so -lssl -lcrypto $(LDFLAGS) -liballgatherv_comparebuffcontent.so: ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h - mpicc -I../ -I../common/ -g -shared -Wall -fPIC -DENABLE_COMPARE_DATA_VALIDATION=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_comparebuffcontent.so -lssl -lcrypto +liballgatherv_comparebuffcontent.so: check-env ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h + mpicc -I../ -I../common/ -g -shared -Wall -fPIC $(CFLAGS) -DENABLE_COMPARE_DATA_VALIDATION=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv_comparebuffcontent.so -lssl -lcrypto $(LDFLAGS) -liballgatherv.so: ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h - mpicc -I../ -I../common/ -g -shared -Wall -fPIC ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv.so -lssl -lcrypto +liballgatherv.so: check-env ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o ../common/buff_content.o mpi_allgatherv.c allgatherv_profiler.h + mpicc -I../ -I../common/ -g -shared -Wall -fPIC $(CFLAGS) ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o ../common/buff_content.o mpi_allgatherv.c -o liballgatherv.so -lssl -lcrypto $(LDFLAGS) + +check-env: +ifdef MPIX_HARMONIZE_PREFIX + CFLAGS+=-DHAVE_MPIX_HARMONIZE=1 -I$(MPIX_HARMONIZE_PREFIX)/include + LDFLAGS+=-L$(MPIX_HARMONIZE_PREFIX)/lib64 -Wl,-rpath $(MPIX_HARMONIZE_PREFIX)/lib64 -lmpix-harmonize -lmpits +endif check: all clean: - @rm -f *.so *.o \ No newline at end of file + @rm -f *.so *.o diff --git a/src/allgatherv/mpi_allgatherv.c b/src/allgatherv/mpi_allgatherv.c index 38bcf81..3915780 100644 --- a/src/allgatherv/mpi_allgatherv.c +++ b/src/allgatherv/mpi_allgatherv.c @@ -1,6 +1,6 @@ /************************************************************************* * Copyright (c) 2019-2010, Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved. * * See LICENSE.txt for license information ************************************************************************/ @@ -53,6 +53,22 @@ double *late_arrival_timings = NULL; static logger_t *logger = NULL; +#if defined(HAVE_MPIX_HARMONIZE) +#include + +/* The frequency of re-harmonization, counting MPI_Allgatherv on MPI_COMM_WORLD */ +#define TRAMPOLINE_FREQUENCY 50 + +static int _trampoline_flag = 0; +static int _trampoline_iterations = 0; +#endif /* defined(HAVE_MPIX_HARMONIZE) */ + +#if ENABLE_EXEC_TIMING +double timestamps_start[500]; +double timestamps_end[500]; +size_t num_timestamps = 0; +#endif // ENABLE_EXEC_TIMING + /* FORTRAN BINDINGS */ extern int mpi_fortran_in_place_; #define OMPI_IS_FORTRAN_IN_PLACE(addr) \ @@ -975,12 +991,26 @@ int MPI_Finalize() int MPI_Init_thread(int *argc, char ***argv, int required, int *provided) { - return _mpi_init_thread(argc, argv, required, provided); + int rc = _mpi_init_thread(argc, argv, required, provided); +#if defined(HAVE_MPIX_HARMONIZE) + if( MPI_SUCCESS == rc ) { + /* harmonize the clocks across all ranks in MPI_COMM_WORLD */ + rc = MPIX_Harmonize(MPI_COMM_WORLD, &_trampoline_flag); + } +#endif /* defined(HAVE_MPIX_HARMONIZE) */ + return rc; } int MPI_Init(int *argc, char ***argv) { - return _mpi_init(argc, argv); + int rc = _mpi_init(argc, argv); +#if defined(HAVE_MPIX_HARMONIZE) + if( MPI_SUCCESS == rc ) { + /* harmonize the clocks across all ranks in MPI_COMM_WORLD */ + rc = MPIX_Harmonize(MPI_COMM_WORLD, &_trampoline_flag); + } +#endif /* defined(HAVE_MPIX_HARMONIZE) */ + return rc; } int mpi_init_thread_(MPI_Fint *required, MPI_Fint *provided, MPI_Fint *ierr) @@ -1153,12 +1183,41 @@ static int _commit_data() { log_profiling_data(logger, allgathervCalls, allgathervCallStart, allgathervCallsLogged, counts_head, displs_head, op_timing_exec_head); - /* +/* #if ENABLE_TIMING - log_timing_data(logger, op_timing_exec_head); + //log_timing_data(logger, op_timing_exec_head); #endif // ENABLE_TIMING */ +#if ENABLE_EXEC_TIMING + /* Save start & end timestamps */ + if (num_timestamps > 0) + { + int ret, rc; + size_t i; + char *filename = NULL; + if (getenv(OUTPUT_DIR_ENVVAR)) + { + _asprintf(filename, rc, "%s/timestamps.rank%d.md", getenv(OUTPUT_DIR_ENVVAR), world_rank); + } + else + { + _asprintf(filename, rc, "timestamps.rank%d.md", world_rank); + } + assert(rc > 0); + + FILE *f = fopen(filename, "w"); + assert(f); + + for (i = 0; i < num_timestamps; i++) + { + fprintf(f, "%lf %lf\n", timestamps_start[i], timestamps_end[i]); + } + fclose(f); + num_timestamps = 0; + } +#endif // ENABLE_EXEC_TIMING + #if ENABLE_PATTERN_DETECTION && !TRACK_PATTERNS_ON_CALL_BASIS save_patterns(world_rank); #endif // ENABLE_PATTERN_DETECTION && !TRACK_PATTERNS_ON_CALL_BASIS @@ -1356,6 +1415,10 @@ int _mpi_allgatherv(const void *sendbuf, const int sendcount, MPI_Datatype sendt #if ENABLE_EXEC_TIMING double t_start = MPI_Wtime(); + if (num_timestamps < 500) + { + timestamps_start[num_timestamps] = t_start; + } #endif // ENABLE_EXEC_TIMING ret = PMPI_Allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm); @@ -1378,6 +1441,11 @@ int _mpi_allgatherv(const void *sendbuf, const int sendcount, MPI_Datatype sendt #if ENABLE_EXEC_TIMING double t_end = MPI_Wtime(); + if (num_timestamps < 500) + { + timestamps_end[num_timestamps] = t_end; + num_timestamps++; + } double t_op = t_end - t_start; #endif // ENABLE_EXEC_TIMING @@ -1598,6 +1666,20 @@ int MPI_Allgatherv(const void *sendbuf, const int sendcount, MPI_Datatype sendty void *recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPI_Comm comm) { +#if defined(HAVE_MPIX_HARMONIZE) + /* From time to time we need to resynchronize the clocks, but we can only do it on MPI_Allgatherv on + * MPI_COMM_WORLD. + */ + if( MPI_COMM_WORLD == comm ) { + _trampoline_iterations++; + if( 0 == (_trampoline_iterations % TRAMPOLINE_FREQUENCY) ) { + int rc = MPIX_Harmonize(MPI_COMM_WORLD, &_trampoline_flag); + if( MPI_SUCCESS != rc ) { + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + } +#endif /* defined(HAVE_MPIX_HARMONIZE) */ return _mpi_allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm); } @@ -1632,6 +1714,7 @@ void mpi_allgatherv_(void *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, // if the app never calls MPI_Finalize(). __attribute__((destructor)) void calledLast() { - _commit_data(); - _finalize_profiling(); + if( NULL == logger ) return; /* nothing more to do, already done */ + _commit_data(); + _finalize_profiling(); } diff --git a/src/alltoall/Makefile b/src/alltoall/Makefile index 6875351..a278584 100644 --- a/src/alltoall/Makefile +++ b/src/alltoall/Makefile @@ -11,34 +11,40 @@ include ../makefile_common.mk all: liballtoall.so liballtoall_location.so liballtoall_counts.so liballtoall_late_arrival.so liballtoall_exec_timings.so liballtoall_backtrace.so -liballtoall_counts.so: ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_counts.o ../common/logger_counts.o ../common/buff_content.o mpi_alltoall.c alltoall_profiler.h - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=0 -DENABLE_RAW_DATA=1 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_counts.o ../common/logger_counts.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_counts.so - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=0 -DENABLE_RAW_DATA=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_counts.o ../common/logger_counts.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_counts_unequal.so - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=1 -DENABLE_RAW_DATA=1 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_counts.o ../common/logger_counts.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_counts_compact.so - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=1 -DENABLE_RAW_DATA=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_counts.o ../common/logger_counts.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_counts_unequal_compact.so - -liballtoall_exec_timings.so: ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o ../common/buff_content.o mpi_alltoall.c alltoall_profiler.h - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_EXEC_TIMING=1 ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_exec_timings.so - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_EXEC_TIMING=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_exec_timings_counts_unequal.so - -liballtoall_late_arrival.so: ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o ../common/buff_content.o mpi_alltoall.c alltoall_profiler.h - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LATE_ARRIVAL_TIMING=1 ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_late_arrival.so - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LATE_ARRIVAL_TIMING=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_late_arrival_counts_unequal.so - -liballtoall_backtrace.so: ${COMMON_OBJECTS} ../common/logger_backtrace.o ../common/timings.o ../common/buff_content.o mpi_alltoall.c alltoall_profiler.h - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_BACKTRACE=1 ${COMMON_OBJECTS} ../common/logger_backtrace.o ../common/timings.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_backtrace.so - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_BACKTRACE=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/logger_backtrace.o ../common/timings.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_backtrace_counts_unequal.so - -liballtoall_location.so: ${COMMON_OBJECTS} ../common/logger_location.o ../common/timings.o ../common/buff_content.o mpi_alltoall.c alltoall_profiler.h - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LOCATION_TRACKING=1 ${COMMON_OBJECTS} ../common/logger_location.o ../common/timings.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_location.so - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LOCATION_TRACKING=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/logger_location.o ../common/timings.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_location_counts_unequal.so - -liballtoall.so: ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o ../common/buff_content.o mpi_alltoall.c alltoall_profiler.h - mpicc -I../ -I../common/ -g -shared -fPIC ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o ../common/buff_content.o mpi_alltoall.c -o liballtoall.so - mpicc -I../ -I../common/ -g -shared -fPIC -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_counts_unequal.so +liballtoall_counts.so: check-env ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_counts.o ../common/logger_counts.o ../common/buff_content.o mpi_alltoall.c alltoall_profiler.h + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_COMPACT_FORMAT=0 -DENABLE_RAW_DATA=1 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_counts.o ../common/logger_counts.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_counts.so $(LDFLAGS) + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_COMPACT_FORMAT=0 -DENABLE_RAW_DATA=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_counts.o ../common/logger_counts.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_counts_unequal.so $(LDFLAGS) + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_COMPACT_FORMAT=1 -DENABLE_RAW_DATA=1 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_counts.o ../common/logger_counts.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_counts_compact.so $(LDFLAGS) + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_COMPACT_FORMAT=1 -DENABLE_RAW_DATA=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_counts.o ../common/logger_counts.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_counts_unequal_compact.so $(LDFLAGS) + +liballtoall_exec_timings.so: check-env ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o ../common/buff_content.o mpi_alltoall.c alltoall_profiler.h + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_EXEC_TIMING=1 ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_exec_timings.so $(LDFLAGS) + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_EXEC_TIMING=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_exec_timings_counts_unequal.so $(LDFLAGS) + +liballtoall_late_arrival.so: check-env ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o ../common/buff_content.o mpi_alltoall.c alltoall_profiler.h + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_LATE_ARRIVAL_TIMING=1 ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_late_arrival.so $(LDFLAGS) + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_LATE_ARRIVAL_TIMING=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_late_arrival_counts_unequal.so $(LDFLAGS) + +liballtoall_backtrace.so: check-env ${COMMON_OBJECTS} ../common/logger_backtrace.o ../common/timings.o ../common/buff_content.o mpi_alltoall.c alltoall_profiler.h + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_BACKTRACE=1 ${COMMON_OBJECTS} ../common/logger_backtrace.o ../common/timings.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_backtrace.so $(LDFLAGS) + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_BACKTRACE=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/logger_backtrace.o ../common/timings.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_backtrace_counts_unequal.so $(LDFLAGS) + +liballtoall_location.so: check-env ${COMMON_OBJECTS} ../common/logger_location.o ../common/timings.o ../common/buff_content.o mpi_alltoall.c alltoall_profiler.h + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_LOCATION_TRACKING=1 ${COMMON_OBJECTS} ../common/logger_location.o ../common/timings.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_location.so $(LDFLAGS) + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_LOCATION_TRACKING=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/logger_location.o ../common/timings.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_location_counts_unequal.so $(LDFLAGS) + +liballtoall.so: check-env ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o ../common/buff_content.o mpi_alltoall.c alltoall_profiler.h + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o ../common/buff_content.o mpi_alltoall.c -o liballtoall.so $(LDFLAGS) + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o ../common/buff_content.o mpi_alltoall.c -o liballtoall_counts_unequal.so $(LDFLAGS) + +check-env: +ifdef MPIX_HARMONIZE_PREFIX + CFLAGS+=-DHAVE_MPIX_HARMONIZE=1 -I$(MPIX_HARMONIZE_PREFIX)/include + LDFLAGS+=-L$(MPIX_HARMONIZE_PREFIX)/lib64 -Wl,-rpath $(MPIX_HARMONIZE_PREFIX)/lib64 -lmpix-harmonize -lmpits +endif check: all clean: @rm -f *.so *.o - @rm -f grouping_test compress_array_test patterns_detection_test \ No newline at end of file + @rm -f grouping_test compress_array_test patterns_detection_test diff --git a/src/alltoall/mpi_alltoall.c b/src/alltoall/mpi_alltoall.c index 305659a..7915e8e 100755 --- a/src/alltoall/mpi_alltoall.c +++ b/src/alltoall/mpi_alltoall.c @@ -1,6 +1,6 @@ /************************************************************************* * Copyright (c) 2019-2010, Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved. * * See LICENSE.txt for license information ************************************************************************/ @@ -66,6 +66,16 @@ extern int mpi_fortran_bottom_; static int _finalize_profiling(); static int _commit_data(); +#if defined(HAVE_MPIX_HARMONIZE) +#include + +/* The frequency of re-harmonization, counting MPI_Alltoall on MPI_COMM_WORLD */ +#define TRAMPOLINE_FREQUENCY 50 + +static int _trampoline_flag = 0; +static int _trampoline_iterations = 0; +#endif /* defined(HAVE_MPIX_HARMONIZE) */ + void print_trace(FILE *f) { assert(f); @@ -836,7 +846,14 @@ int MPI_Finalize() int MPI_Init(int *argc, char ***argv) { - return _mpi_init(argc, argv); + int rc = _mpi_init(argc, argv); +#if defined(HAVE_MPIX_HARMONIZE) + if( MPI_SUCCESS == rc ) { + /* harmonize the clocks across all ranks in MPI_COMM_WORLD */ + rc = MPIX_Harmonize(MPI_COMM_WORLD, &_trampoline_flag); + } +#endif /* defined(HAVE_MPIX_HARMONIZE) */ + return rc; } int mpi_init_(MPI_Fint *ierr) @@ -1259,7 +1276,21 @@ int _mpi_alltoall(const void *sendbuf, const int sendcount, MPI_Datatype sendtyp int MPI_Alltoall(const void *sendbuf, const int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcount, MPI_Datatype recvtype, MPI_Comm comm) { - return _mpi_alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); +#if defined(HAVE_MPIX_HARMONIZE) + /* From time to time we need to resynchronize the clocks, but we can only do it on MPI_Allgatherv on + * MPI_COMM_WORLD. + */ + if( MPI_COMM_WORLD == comm ) { + _trampoline_iterations++; + if( 0 == (_trampoline_iterations % TRAMPOLINE_FREQUENCY) ) { + int rc = MPIX_Harmonize(MPI_COMM_WORLD, &_trampoline_flag); + if( MPI_SUCCESS != rc ) { + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + } +#endif /* defined(HAVE_MPIX_HARMONIZE) */ + return _mpi_alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm); } void mpi_alltoall_(void *sendbuf, MPI_Fint sendcount, MPI_Fint *sendtype, @@ -1293,6 +1324,7 @@ void mpi_alltoall_(void *sendbuf, MPI_Fint sendcount, MPI_Fint *sendtype, void __attribute__((destructor)) calledLast(); void calledLast() { + if( NULL == logger ) return; /* nothing more to do, already done */ _commit_data(); _finalize_profiling(); -} \ No newline at end of file +} diff --git a/src/alltoallv/Makefile b/src/alltoallv/Makefile index c3fcaa6..7f184ed 100644 --- a/src/alltoallv/Makefile +++ b/src/alltoallv/Makefile @@ -14,32 +14,38 @@ all: liballtoallv.so \ liballtoallv_comparebuffcontent.so \ liballtoallv_late_arrival.so -liballtoallv_counts.so: ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o ../common/logger_for_counts.o mpi_alltoallv.c alltoallv_profiler.h - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_RAW_DATA=1 -DENABLE_COUNTS=1 ../common/logger_for_counts.o ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o ../common/buff_content.o mpi_alltoallv.c -o liballtoallv_counts.so - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=0 -DENABLE_RAW_DATA=1 -DENABLE_COUNTS=1 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_counts.o ../common/logger_counts.o ../common/buff_content.o mpi_alltoallv.c -o liballtoallv_counts_notcompact.so +liballtoallv_counts.so: check-env ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o ../common/logger_for_counts.o mpi_alltoallv.c alltoallv_profiler.h + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_RAW_DATA=1 -DENABLE_COUNTS=1 ../common/logger_for_counts.o ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o ../common/buff_content.o mpi_alltoallv.c -o liballtoallv_counts.so $(LDFLAGS) + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_COMPACT_FORMAT=0 -DENABLE_RAW_DATA=1 -DENABLE_COUNTS=1 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_for_counts.o ../common/logger_counts.o ../common/buff_content.o mpi_alltoallv.c -o liballtoallv_counts_notcompact.so $(LDFLAGS) -liballtoallv_exec_timings.so: ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o ../common/buff_content.o mpi_alltoallv.c alltoallv_profiler.h - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_EXEC_TIMING=1 ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o ../common/buff_content.o mpi_alltoallv.c -o liballtoallv_exec_timings.so +liballtoallv_exec_timings.so: check-env ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o ../common/buff_content.o mpi_alltoallv.c alltoallv_profiler.h + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_EXEC_TIMING=1 ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o ../common/buff_content.o mpi_alltoallv.c -o liballtoallv_exec_timings.so $(LDFLAGS) -liballtoallv_late_arrival.so: ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o ../common/buff_content.o mpi_alltoallv.c alltoallv_profiler.h - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LATE_ARRIVAL_TIMING=1 ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o ../common/buff_content.o mpi_alltoallv.c -o liballtoallv_late_arrival.so +liballtoallv_late_arrival.so: check-env ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o ../common/buff_content.o mpi_alltoallv.c alltoallv_profiler.h + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_LATE_ARRIVAL_TIMING=1 ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o ../common/buff_content.o mpi_alltoallv.c -o liballtoallv_late_arrival.so $(LDFLAGS) -liballtoallv_backtrace.so: ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_alltoallv.c alltoallv_profiler.h - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_BACKTRACE=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_alltoallv.c -o liballtoallv_backtrace.so +liballtoallv_backtrace.so: check-env ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_alltoallv.c alltoallv_profiler.h + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_BACKTRACE=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_alltoallv.c -o liballtoallv_backtrace.so $(LDFLAGS) -liballtoallv_location.so: ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_alltoallv.c alltoallv_profiler.h - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LOCATION_TRACKING=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_alltoallv.c -o liballtoallv_location.so +liballtoallv_location.so: check-env ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_alltoallv.c alltoallv_profiler.h + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_LOCATION_TRACKING=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_alltoallv.c -o liballtoallv_location.so $(LDFLAGS) -liballtoallv_savebuffcontent.so: ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_alltoallv.c alltoallv_profiler.h - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_SAVE_DATA_VALIDATION=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_alltoallv.c -o liballtoallv_savebuffcontent.so -lssl -lcrypto +liballtoallv_savebuffcontent.so: check-env ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_alltoallv.c alltoallv_profiler.h + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_SAVE_DATA_VALIDATION=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_alltoallv.c -o liballtoallv_savebuffcontent.so -lssl -lcrypto $(LDFLAGS) -liballtoallv_comparebuffcontent.so: ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_alltoallv.c alltoallv_profiler.h - mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPARE_DATA_VALIDATION=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_alltoallv.c -o liballtoallv_comparebuffcontent.so -lssl -lcrypto +liballtoallv_comparebuffcontent.so: check-env ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_alltoallv.c alltoallv_profiler.h + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) -DENABLE_COMPARE_DATA_VALIDATION=1 ${COMMON_OBJECTS} ../common/logger.o ../common/timings.o ../common/buff_content.o mpi_alltoallv.c -o liballtoallv_comparebuffcontent.so -lssl -lcrypto $(LDFLAGS) -liballtoallv.so: ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o ../common/buff_content.o mpi_alltoallv.c alltoallv_profiler.h - mpicc -I../ -I../common/ -g -shared -fPIC ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o ../common/buff_content.o mpi_alltoallv.c -o liballtoallv.so -lssl -lcrypto +liballtoallv.so: check-env ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o ../common/buff_content.o mpi_alltoallv.c alltoallv_profiler.h + mpicc -I../ -I../common/ -g -shared -fPIC $(CFLAGS) ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o ../common/buff_content.o mpi_alltoallv.c -o liballtoallv.so -lssl -lcrypto $(LDFLAGS) + +check-env: +ifdef MPIX_HARMONIZE_PREFIX + CFLAGS+=-DHAVE_MPIX_HARMONIZE=1 -I$(MPIX_HARMONIZE_PREFIX)/include + LDFLAGS+=-L$(MPIX_HARMONIZE_PREFIX)/lib64 -Wl,-rpath $(MPIX_HARMONIZE_PREFIX)/lib64 -lmpix-harmonize -lmpits +endif check: all clean: - @rm -f *.so *.o \ No newline at end of file + @rm -f *.so *.o diff --git a/src/alltoallv/mpi_alltoallv.c b/src/alltoallv/mpi_alltoallv.c index 9eccfc2..d2686a0 100755 --- a/src/alltoallv/mpi_alltoallv.c +++ b/src/alltoallv/mpi_alltoallv.c @@ -1,6 +1,6 @@ /************************************************************************* * Copyright (c) 2019-2010, Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved. * * See LICENSE.txt for license information ************************************************************************/ @@ -64,6 +64,16 @@ extern int mpi_fortran_bottom_; #define OMPI_F2C_IN_PLACE(addr) (OMPI_IS_FORTRAN_IN_PLACE(addr) ? MPI_IN_PLACE : (addr)) #define OMPI_F2C_BOTTOM(addr) (OMPI_IS_FORTRAN_BOTTOM(addr) ? MPI_BOTTOM : (addr)) +#if defined(HAVE_MPIX_HARMONIZE) +#include + +/* The frequency of re-harmonization, counting MPI_Alltoallv on MPI_COMM_WORLD */ +#define TRAMPOLINE_FREQUENCY 50 + +static int _trampoline_flag = 0; +static int _trampoline_iterations = 0; +#endif /* defined(HAVE_MPIX_HARMONIZE) */ + static int _finalize_profiling(); static int _commit_data(); @@ -893,12 +903,26 @@ int MPI_Finalize() int MPI_Init_thread(int *argc, char ***argv, int required, int *provided) { - return _mpi_init_thread(argc, argv, required, provided); + int rc = _mpi_init_thread(argc, argv, required, provided); +#if defined(HAVE_MPIX_HARMONIZE) + if( MPI_SUCCESS == rc ) { + /* harmonize the clocks across all ranks in MPI_COMM_WORLD */ + rc = MPIX_Harmonize(MPI_COMM_WORLD, &_trampoline_flag); + } +#endif /* defined(HAVE_MPIX_HARMONIZE) */ + return rc; } int MPI_Init(int *argc, char ***argv) { - return _mpi_init(argc, argv); + int rc = _mpi_init(argc, argv); +#if defined(HAVE_MPIX_HARMONIZE) + if( MPI_SUCCESS == rc ) { + /* harmonize the clocks across all ranks in MPI_COMM_WORLD */ + rc = MPIX_Harmonize(MPI_COMM_WORLD, &_trampoline_flag); + } +#endif /* defined(HAVE_MPIX_HARMONIZE) */ + return rc; } int mpi_init_thread_(MPI_Fint *required, MPI_Fint *provided, MPI_Fint *ierr) @@ -1438,7 +1462,21 @@ int MPI_Alltoallv(const void *sendbuf, const int *sendcounts, const int *sdispls MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPI_Comm comm) { - return _mpi_alltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm); +#if defined(HAVE_MPIX_HARMONIZE) + /* From time to time we need to resynchronize the clocks, but we can only do it on MPI_Alltoallv on + * MPI_COMM_WORLD. + */ + if( MPI_COMM_WORLD == comm ) { + _trampoline_iterations++; + if( 0 == (_trampoline_iterations % TRAMPOLINE_FREQUENCY) ) { + int rc = MPIX_Harmonize(MPI_COMM_WORLD, &_trampoline_flag); + if( MPI_SUCCESS != rc ) { + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + } +#endif /* defined(HAVE_MPIX_HARMONIZE) */ + return _mpi_alltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm); } void mpi_alltoallv_(void *sendbuf, MPI_Fint *sendcount, MPI_Fint *sdispls, MPI_Fint *sendtype, @@ -1473,6 +1511,7 @@ void mpi_alltoallv_(void *sendbuf, MPI_Fint *sendcount, MPI_Fint *sdispls, MPI_F // if the app never calls MPI_Finalize(). __attribute__((destructor)) void calledLast() { + if( NULL == logger ) return; /* nothing more to do, already done */ _commit_data(); _finalize_profiling(); }