From c2689883c3eb32278e15dfecdaa3328a79be1049 Mon Sep 17 00:00:00 2001
From: Geoffroy Vallee <geoffroy.vallee@gmail.com>
Date: Tue, 22 Jun 2021 15:51:33 -0400
Subject: [PATCH] ISC2021 CC USTC

---
 README.md                                     |   6 +-
 build-scripts/build-hpcac.sh                  |   4 +-
 examples/Makefile                             |  20 +-
 examples/alltoall.c                           | 429 +++++++++++++++++
 src/alltoall/Makefile                         |  50 +-
 tools/Makefile                                |   5 +-
 tools/cmd/png/png.go                          | 245 ++++++++++
 tools/cmd/validate/validate.go                | 449 ++++++------------
 tools/internal/pkg/plot/plot.go               | 101 ++--
 .../pkg/webui/templates/heatmapDetails.html   |   8 +
 .../pkg/webui/templates/heatmapsLayout.html   | 160 +++++++
 tools/internal/pkg/webui/templates/index.html |   9 +-
 tools/internal/pkg/webui/webui.go             |  68 ++-
 13 files changed, 1134 insertions(+), 420 deletions(-)
 create mode 100644 examples/alltoall.c
 create mode 100644 tools/cmd/png/png.go
 create mode 100644 tools/internal/pkg/webui/templates/heatmapDetails.html
 create mode 100644 tools/internal/pkg/webui/templates/heatmapsLayout.html

diff --git a/README.md b/README.md
index 62b1ec45..f2c6da90 100644
--- a/README.md
+++ b/README.md
@@ -86,14 +86,14 @@ Like any PMPI option, users need to use `LD_PRELOAD` while executing their appli
 On a platform where `mpirun` is directly used, the command to start the application
 looks like:
 ```
-LD_PRELOAD=$HOME<path_to_repo>/src/alltoallv/liballtoallv.so mpirun --oversubscribe -np 3 app.exe 
+LD_PRELOAD=$HOME<path_to_repo>/alltoallv/liballtoallv.so mpirun --oversubscribe -np 3 app.exe 
 ```
 
 On a platform where a job manager is used, such as Slurm, users need to update the
 batch script used to submit an application run. For instance, with Open MPI and Slurm,
 it would look like:
 ```
-mpirun -np $NPROC -x LD_PRELOAD=<path_to_repo>/src/alltoallv/liballtoallv_counts.so app.exe
+mpirun -np $NPROC -x LD_PRELOAD=/global/home/users/geoffroy/projects/alltoall_profiling/alltoallv/liballtoallv_counts.so app.exe
 ```
 
 When using a job scheduler, users are required to correctly set the LD_PRELOAD details
@@ -331,4 +331,4 @@ consist of a graph providing:
 - the execution time per rank,
 - the arrival time per rank,
 - the bandwidth per rank.
-Under the graph appears the raw send and receive counters.
+Under the graph appears the raw send and receive counters.
\ No newline at end of file
diff --git a/build-scripts/build-hpcac.sh b/build-scripts/build-hpcac.sh
index 581347d8..d0615f27 100644
--- a/build-scripts/build-hpcac.sh
+++ b/build-scripts/build-hpcac.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-cd /global/home/users/cyrusl/placement/expt0070/alltoall_profiling
+cd /global/home/users/cyrusl/placement/expt0066/alltoall_profiling
 
 module purge
 spack unload --all
@@ -8,7 +8,7 @@ spack unload --all
 HNAME=$(hostname)
 
 #if [[ ${HNAME:0:4} == "thor" ]]; then
-    module load gcc/8.3.1 hpcx/2.7.0 gnuplot/5.2.8
+    module load gcc/8.3.1 hpcx/2.7.0
 #else
 #    module load gcc/4.8.5 hpcx/2.7.0  # these were used for compiling on Login node for use on Jupiter before change to Centos 8
 #fi
diff --git a/examples/Makefile b/examples/Makefile
index f2b54ba1..8a492f0a 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -4,23 +4,11 @@
 # See LICENSE.txt for license information
 #
 
-all: alltoallv_c alltoallv_dt_c alltoallv_multicomms_c alltoallv_f alltoallv_bigcounts_c alltoall_demo alltoall_simple_c alltoall_bigcounts_c alltoall_multicomms_c alltoall_dt_c
+all: alltoallv_c alltoallv_dt_c alltoallv_multicomms_c alltoallv_f alltoallv_bigcounts_c alltoall
 
-alltoall_demo: alltoall_demo.c
-	mpicc -g alltoall_demo.c -o alltoall_demo
-
-alltoall_simple_c: alltoall_simple_c.c
-	mpicc -g alltoall_simple_c.c -o alltoall_simple_c
+alltoall: alltoall.c
+	mpicc -g alltoall.c -o alltoall
   
-alltoall_bigcounts_c: alltoall_bigcounts_c.c
-	mpicc -g alltoall_bigcounts_c.c -o alltoall_bigcounts_c
-
-alltoall_multicomms_c: alltoall_multicomms_c.c
-	mpicc -g alltoall_multicomms_c.c -o alltoall_multicomms_c
-
-alltoall_dt_c: alltoall_dt_c.c
-	mpicc -g alltoall_dt_c.c -o alltoall_dt_c
-
 alltoallv_c: alltoallv.c
 	mpicc -g alltoallv.c -o alltoallv_c
 
@@ -37,4 +25,4 @@ alltoallv_bigcounts_c: alltoallv_bigcounts.c
 	mpicc -g alltoallv_bigcounts.c -o alltoallv_bigcounts_c
 
 clean:
-	@rm -f alltoallv_c alltoallv_dt_c alltoallv_f alltoallv_multicomms_c alltoallv_bigcounts_c alltoall_demo alltoall_simple_c
+	@rm -f alltoallv_c alltoallv_dt_c alltoallv_f alltoallv_multicomms_c alltoallv_bigcounts_c alltoall
diff --git a/examples/alltoall.c b/examples/alltoall.c
new file mode 100644
index 00000000..fbf0b71c
--- /dev/null
+++ b/examples/alltoall.c
@@ -0,0 +1,429 @@
+/* A test of MPI_Alltoall provding count patterns to be recorded by the samples 
+ */ 
+
+#include <mpi.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "../src/alltoall/alltoall_profiler.h"
+#define DEBUG_FLUSH 1
+
+
+// to be compiled with std=c99
+
+/* man page for MPI_Alltoall says
+ * "The amount of data sent must be equal to the amount of data received, pairwise, between every pair of processes."
+ * hence the first constant below is constant - can use different values of it in different runs */ 
+#define RANK_TO_RANK_BLOCKSIZE 16 
+// Constants for placement of info encoded in send and recv buffers
+#define BYTE_1_MULTIPLIER 256
+#define BYTE_2_MULTIPLIER 65536
+
+#define PARAM_SETS_COUNT 10
+#define RANK_SETS_COUNT 3
+
+/* MPI and C types used in this test are:  
+    MPI_UINT8_T         uint8_t    index=0
+    MPI_UINT16_T        uint16_t   index=1
+    MPI_UINT32_T        uint32_t   index=2
+    MPI_UINT64_T        uint64_t   index=3
+*/
+MPI_Datatype MPI_Datatypes_used[4] = {MPI_UINT8_T,MPI_UINT16_T,MPI_UINT32_T, MPI_UINT64_T};
+const char type_strings[4][10] = {"uint8_t", "uint16_t", "uint32_t", "uint64_t"} ;  //for printing out the parameter sets 
+
+
+typedef struct rank_set{
+    int count;
+    int ranks[10];
+    MPI_Comm communicator;
+} rank_set_t;
+
+
+typedef struct alltoall_test_node_params {
+    int send_type_idx;
+    int recv_type_idx;
+    int sendcount;
+    int recvcount;
+    rank_set_t* rank_set;
+} alltoall_test_node_params_t;
+
+
+rank_set_t* create_rank_sets(){
+    rank_set_t* rank_sets = malloc(sizeof(rank_set_t) * RANK_SETS_COUNT);  
+    rank_set_t new_set0 = { .count= 8, .ranks = {0, 1, 2, 3, 4, 5, 6, 7} };
+    rank_sets[0] = new_set0;
+    rank_set_t new_set1 = { .count = 4, .ranks = {0, 1, 2, 3} };
+    rank_sets[1] = new_set1;
+    rank_set_t new_set2 = { .count = 4, .ranks = {4, 5, 6, 7} };
+    rank_sets[2] = new_set2;
+    return rank_sets;
+}
+
+bool is_rank_in_rankset(int rank, rank_set_t* rank_set){
+    for(int i=0; i<rank_set->count; i++){
+        if (rank_set->ranks[i] == rank) return true;
+    }
+    return false;
+}
+
+// creates a set of communicators having ranks defined by ranksets
+void create_communicators(int world_size, rank_set_t* rank_sets, int rank_sets_count){
+    DEBUG_ALLTOALL_PROFILING("params for create_communicators: worldsize = %i, ranks_sets_count = %i\n", world_size, rank_sets_count);
+    for (int k; k<8; k++) DEBUG_ALLTOALL_PROFILING("%i ", rank_sets[0].ranks[k]); 
+    DEBUG_ALLTOALL_PROFILING("\n)");
+
+    //MPI_Comm** communicators = (MPI_Comm**) malloc(sizeof(MPI_Comm*) * world_size);
+    MPI_Group world_group;
+    DEBUG_ALLTOALL_PROFILING("calling MPI_Comm_group ...\n", NULL);
+    MPI_Comm_group(MPI_COMM_WORLD, &world_group);
+
+    int group_size;
+    MPI_Group_size(world_group , &group_size);
+    DEBUG_ALLTOALL_PROFILING("World group size = %i\n", group_size);
+
+    for (int rank_set_idx=0; rank_set_idx< rank_sets_count; rank_set_idx++){
+        DEBUG_ALLTOALL_PROFILING("IN LOOP\n");
+        rank_set_t* rank_set = &rank_sets[rank_set_idx];
+        for (int k; k<8; k++) DEBUG_ALLTOALL_PROFILING("* %i ", rank_set->ranks[k]); 
+        DEBUG_ALLTOALL_PROFILING("\n");
+        // signature: MPI_Group_incl( MPI_Group group , int n , const int ranks[] , MPI_Group* newgroup);
+        DEBUG_ALLTOALL_PROFILING("calling MPI_Group_incl rank_set_idx=%i ...\n", rank_set_idx);
+        // signature: int MPI_Group_incl(MPI_Group group, int n, const int ranks[], MPI_Group *newgroup)
+        MPI_Group group;
+        DEBUG_ALLTOALL_PROFILING("rankSetcount = %i\n", rank_set->count);
+        MPI_Group_incl(world_group, rank_set->count, rank_set->ranks, &group);
+        DEBUG_ALLTOALL_PROFILING("calling MPI_Comm_create_group rank_set_idx=%i ...\n", rank_set_idx);
+        // signature: MPI_Comm_create_group( MPI_Comm comm , MPI_Group group , int tag , MPI_Comm* newcomm);
+        MPI_Comm_create_group(MPI_COMM_WORLD , group , 0, &rank_set->communicator);
+        DEBUG_ALLTOALL_PROFILING("Group created ...\n", NULL);       
+    }
+    return;
+}
+
+
+alltoall_test_node_params_t* alltoall_test_all_node_params_sets(rank_set_t* rank_sets){
+    DEBUG_ALLTOALL_PROFILING("creating param sets ...\n", NULL);
+    alltoall_test_node_params_t* params_sets = (alltoall_test_node_params_t*) malloc(sizeof(alltoall_test_node_params_t) * PARAM_SETS_COUNT);
+    // template: paramset[] = (alltoall_test_node_params_t) {.send_type_idx =  , .recv_type_idx = , .sendcount =  , .recvcount = , .communicator = communicators[] };
+    params_sets[0] = (alltoall_test_node_params_t) {.send_type_idx = 0, .recv_type_idx = 0, .sendcount =  8, .recvcount =  8, .rank_set = &rank_sets[0]};
+    params_sets[1] = (alltoall_test_node_params_t) {.send_type_idx = 0, .recv_type_idx = 0, .sendcount = 16, .recvcount = 16, .rank_set = &rank_sets[0]};
+    params_sets[2] = (alltoall_test_node_params_t) {.send_type_idx = 0, .recv_type_idx = 0, .sendcount = 32, .recvcount = 32, .rank_set = &rank_sets[1]};
+    params_sets[3] = (alltoall_test_node_params_t) {.send_type_idx = 0, .recv_type_idx = 0, .sendcount = 64, .recvcount = 64, .rank_set = &rank_sets[0]};
+    params_sets[4] = (alltoall_test_node_params_t) {.send_type_idx = 1, .recv_type_idx = 1, .sendcount =  8, .recvcount =  8, .rank_set = &rank_sets[0]};
+    params_sets[5] = (alltoall_test_node_params_t) {.send_type_idx = 2, .recv_type_idx = 2, .sendcount = 16, .recvcount = 16, .rank_set = &rank_sets[0]};
+    params_sets[6] = (alltoall_test_node_params_t) {.send_type_idx = 0, .recv_type_idx = 0, .sendcount =  8, .recvcount =  8, .rank_set = &rank_sets[1]};
+    params_sets[7] = (alltoall_test_node_params_t) {.send_type_idx = 0, .recv_type_idx = 0, .sendcount = 16, .recvcount = 16, .rank_set = &rank_sets[1]};
+    params_sets[8] = (alltoall_test_node_params_t) {.send_type_idx = 1, .recv_type_idx = 0, .sendcount =  8, .recvcount = 16, .rank_set = &rank_sets[0]};
+    params_sets[9] = (alltoall_test_node_params_t) {.send_type_idx = 0, .recv_type_idx = 1, .sendcount = 16, .recvcount =  8, .rank_set = &rank_sets[0]};
+    DEBUG_ALLTOALL_PROFILING("param sets created\n", NULL);
+    return params_sets;
+} 
+
+alltoall_test_node_params_t* alltoall_test_individual_node_params_sets(rank_set_t* rank_sets){
+    DEBUG_ALLTOALL_PROFILING("creating param sets ...\n", NULL);
+    alltoall_test_node_params_t* params_sets = (alltoall_test_node_params_t*) malloc(sizeof(alltoall_test_node_params_t) * PARAM_SETS_COUNT);
+    // template: paramset[] = (alltoall_test_node_params_t) {.send_type_idx =  , .recv_type_idx = , .sendcount =  , .recvcount = , .communicator = communicators[] };
+    params_sets[0] = (alltoall_test_node_params_t) {.send_type_idx = 0, .recv_type_idx = 0, .sendcount = 16, .recvcount = 16, .rank_set = &rank_sets[0]};
+    params_sets[1] = (alltoall_test_node_params_t) {.send_type_idx = 0, .recv_type_idx = 1, .sendcount = 16, .recvcount =  8, .rank_set = &rank_sets[0]};
+    params_sets[2] = (alltoall_test_node_params_t) {.send_type_idx = 1, .recv_type_idx = 0, .sendcount =  8, .recvcount = 16, .rank_set = &rank_sets[0]};
+    params_sets[3] = (alltoall_test_node_params_t) {.send_type_idx = 1, .recv_type_idx = 1, .sendcount =  8, .recvcount =  8, .rank_set = &rank_sets[0]};
+    DEBUG_ALLTOALL_PROFILING("param sets created\n", NULL);
+    return params_sets;
+} 
+
+
+void* create_sendbuf(alltoall_test_node_params_t* node_params){
+    void* a;
+    DEBUG_ALLTOALL_PROFILING("in create_Sendbuf\n", NULL);
+    switch (node_params->send_type_idx){
+        case 0: 
+            DEBUG_ALLTOALL_PROFILING("in case 0: buffersize = %i\n", node_params->sendcount * node_params->rank_set->count);
+            a = malloc(sizeof(uint8_t) * node_params->sendcount * node_params->rank_set->count);
+            DEBUG_ALLTOALL_PROFILING("sendbuf initialised\n", NULL);
+            uint8_t* b = (uint8_t*) a;
+            DEBUG_ALLTOALL_PROFILING("some buffer items %i %i %i\n", b[0], b[1], b[2]);
+            for (int i=0; i < node_params->sendcount * node_params->rank_set->count; i++){
+                DEBUG_ALLTOALL_PROFILING("i=%i ", i);
+                b[i] = i / node_params->sendcount;
+            }
+            DEBUG_ALLTOALL_PROFILING("\n");
+#if DEBUG == 1            
+            for (int j=0; j<64; j++) DEBUG_ALLTOALL_PROFILING("~~ %i ", b[j]);
+#endif           
+            return a;
+            break;
+        case 1: 
+            a = malloc(sizeof(uint16_t) * node_params->sendcount * node_params->rank_set->count);
+            for (int i=0; i < node_params->sendcount * node_params->rank_set->count; i++) ((uint16_t*) a)[i] = i / node_params->sendcount;
+            return a;
+            break;
+        case 2:
+            a = malloc(sizeof(uint32_t) * node_params->sendcount * node_params->rank_set->count);
+            for (int i=0; i < node_params->sendcount * node_params->rank_set->count; i++) ((uint32_t*) a)[i] = i / node_params->sendcount;
+            return a;
+            break;
+        case 3:
+            a = malloc(sizeof(uint64_t) * node_params->sendcount * node_params->rank_set->count);
+            for (int i=0; i < node_params->sendcount * node_params->rank_set->count; i++) ((uint64_t*) a)[i] = i / node_params->sendcount;
+            return a;
+            break;
+    }
+    DEBUG_ALLTOALL_PROFILING("fell out of case!!!!\n", NULL);
+
+}
+
+
+void* create_recvbuf(alltoall_test_node_params_t* node_params){
+    switch (node_params->recv_type_idx){
+        void* a;
+        case 0:
+            a = malloc(sizeof(uint8_t) * node_params->recvcount * node_params->rank_set->count);
+            for (int i; i < node_params->recvcount * node_params->rank_set->count; i++) ((uint8_t*) a)[i] = 0;
+            return a;
+            break;
+        case 1:
+            a = malloc(sizeof(uint16_t) * node_params->recvcount * node_params->rank_set->count);
+            for (int i; i < node_params->recvcount * node_params->rank_set->count; i++) ((uint16_t*) a)[i] = 0;
+            return a;
+            break;
+        case 2:
+            a = malloc(sizeof(uint32_t) * node_params->recvcount * node_params->rank_set->count);
+            for (int i; i < node_params->recvcount * node_params->rank_set->count; i++) ((uint32_t*) a)[i] = 0;
+            return a;
+            break;
+        case 3:
+            a = malloc(sizeof(uint64_t) * node_params->recvcount * node_params->rank_set->count);
+            for (int i; i < node_params->recvcount * node_params->rank_set->count; i++) ((uint64_t*) a)[i] = 0;
+            return a;
+            break;
+    }
+}
+
+
+
+void print_buffers(int my_rank, int world_size, alltoall_test_node_params_t* param_set, void* sendbuf, void* recvbuf){
+    // make sure only one rank prints at once, using barrier and sleep
+    for (int rank=0; rank<world_size; rank++){ 
+        MPI_Barrier(param_set->rank_set->communicator);
+        DEBUG_ALLTOALL_PROFILING("Done MPI_Barrier for print from rank = %i\n", rank);
+        if (my_rank == rank){
+            printf("Buffers for RANK #%i\n", my_rank);
+            for (int block_idx=0; block_idx<param_set->rank_set->count; block_idx++){
+                printf("SENDBUF to rank #%i  : ", block_idx);
+                for (int idx=0; idx<param_set->sendcount; idx++){
+                    switch (param_set->send_type_idx){
+                        case 0:
+                            printf(" %02x ", ((uint8_t*)sendbuf)[block_idx * param_set->sendcount + idx]);
+                            break;
+                        case 1:
+                            printf(" %04x ", ((uint16_t*)sendbuf)[block_idx * param_set->sendcount + idx]);
+                            break;
+                        case 2:
+                            printf(" %08x ", ((uint32_t*)sendbuf)[block_idx * param_set->sendcount + idx]);
+                            break;
+                        case 3:
+                            printf(" %016lx ", ((uint64_t*)sendbuf)[block_idx * param_set->sendcount + idx]);
+                            break;
+                    }
+                }
+                printf("\n");
+                fflush(stdout);
+            }
+            for (int block_idx=0; block_idx<param_set->rank_set->count; block_idx++){
+                printf("RECVBUF from rank #%i: ", block_idx);
+                for (int idx=0; idx<param_set->recvcount; idx++){
+                    switch (param_set->recv_type_idx){
+                        case 0:
+                            printf(" %02x ", ((uint8_t*)recvbuf)[block_idx * param_set->recvcount + idx]);
+                            break;
+                        case 1:
+                            printf(" %04x ", ((uint16_t*)recvbuf)[block_idx * param_set->recvcount + idx]);
+                            break;
+                        case 2:
+                            printf(" %08x ", ((uint32_t*)recvbuf)[block_idx * param_set->recvcount + idx]);
+                            break;
+                        case 3:
+                            printf(" %016lx ", ((uint64_t*)recvbuf)[block_idx * param_set->recvcount + idx]);
+                            break;
+                    }
+                }
+                printf("\n");
+                fflush(stdout);
+            }
+            printf("\n");
+            fflush(stdout);
+        }
+        sleep(1.0);            
+    }
+}
+
+int main(int argc, char *argv[]) {
+
+    DEBUG_ALLTOALL_PROFILING("in main ...\n", NULL);
+    // Intialise MPI
+    int world_size, my_rank;
+    MPI_Init(NULL, NULL);
+    MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+    DEBUG_ALLTOALL_PROFILING("MPI initialsised: world_size=%i, my_rank=%i\n", world_size, my_rank);    
+    // Test #1 all ranks have same send and recv types same as its receive type
+    // and type for first half of ranks is uint32_t, and second half is uint8_t
+    printf("MPI Datatypes used:\n");
+    for (int i=0; i<4; i++){
+        printf("name, value: %s, %li\n", type_strings[i], (uint64_t) MPI_Datatypes_used[i]);
+    }
+
+
+    // set up alltoall parameter sets and the communicators therefor
+    rank_set_t* rank_sets = create_rank_sets();
+
+    DEBUG_ALLTOALL_PROFILING("calling create_communicators ...\n", NULL);
+    create_communicators(world_size, rank_sets, RANK_SETS_COUNT); // TODO macro for the number of ranks sets =3
+    DEBUG_ALLTOALL_PROFILING("creating alltoall_test_all_node_params_sets ...\n", NULL);
+    alltoall_test_node_params_t* param_sets = alltoall_test_all_node_params_sets(rank_sets);
+    DEBUG_ALLTOALL_PROFILING("returned from alltoall_test_all_node_params_sets()\n", NULL);
+    DEBUG_ALLTOALL_PROFILING("param set [0]:\n", NULL);
+    DEBUG_ALLTOALL_PROFILING(".send_type_idx = %i\n", param_sets[0].send_type_idx);
+    DEBUG_ALLTOALL_PROFILING(".sendcount = %i\n", param_sets[0].sendcount);
+
+    // to test aggregation of patters this set should have duplicates
+    // int param_sets_indices[] = {2, 1, 2, 1, 1, 2, 2, 1, 2};  // this is the highest level of the pattern of the MPI_alltoall calls - each int here specifies the parameter set to be used in one MPI_alltoall
+    int param_sets_indices[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+    int param_sets_set_count = 10;
+    DEBUG_ALLTOALL_PROFILING("created param set indices\n", NULL);
+    
+    // test 1 all ranks use same sendcount and recvcount so 
+    if (my_rank == 0){
+        printf("\n\nMPI_Alltoall test with all nodes having same send and receive type\n");
+        fflush(stdout);
+    }
+
+    for (int set_idx=0; set_idx<param_sets_set_count; set_idx++){
+        DEBUG_ALLTOALL_PROFILING("retrieving next parameter set ... *****************\n", NULL);
+        alltoall_test_node_params_t* param_set = &param_sets[param_sets_indices[set_idx]];    // the same parameter set is used for all ranks in the alltoall call
+        DEBUG_ALLTOALL_PROFILING("next parameter set retrieved\n", NULL);
+        DEBUG_ALLTOALL_PROFILING("param set [this set]:\n", NULL);
+        DEBUG_ALLTOALL_PROFILING(".send_type_idx = %i\n", param_set->send_type_idx);
+        DEBUG_ALLTOALL_PROFILING(".sendcount = %i\n", param_set->sendcount);
+
+        // is this bit seg faulting?????
+        // if (my_rank == 0){
+        //     printf("Calling MPI_Alltoall with send type = %s , recv type = %s , send count = %i ,  recv count = %i \n", type_strings[param_set->send_type_idx], param_set->sendcount, type_strings[param_set->recv_type_idx], param_set->recvcount);
+        // }
+
+        if (is_rank_in_rankset(my_rank, param_set->rank_set)){
+
+            DEBUG_ALLTOALL_PROFILING("creating sendbuf...\n", NULL);
+            void* sendbuf = create_sendbuf(param_set);
+            DEBUG_ALLTOALL_PROFILING("created sendbuf\n", NULL);
+            void* recvbuf = create_recvbuf(param_set);
+            DEBUG_ALLTOALL_PROFILING("created recvbuf\n", NULL);
+            // signature: MPI_Alltoall( const void* sendbuf , int sendcount , MPI_Datatype sendtype , void* recvbuf , int recvcount , MPI_Datatype recvtype , MPI_Comm comm);
+            DEBUG_ALLTOALL_PROFILING("MPI_UINT8_T = %i\n", MPI_UINT8_T);
+            //MPI_Alltoall(sendbuf, 8 , MPI_UINT8_T , recvbuf , 8 , MPI_UINT8_T , MPI_COMM_WORLD);
+            DEBUG_ALLTOALL_PROFILING("Done basic MPI_Alltoall\n", NULL);
+
+            // test that my rank is one of the communicator used in this call - if not omit this call
+            DEBUG_ALLTOALL_PROFILING("DEBUG driver prog: send type index, value, %i, %i\n", param_set->send_type_idx, MPI_Datatypes_used[param_set->send_type_idx] );
+            MPI_Alltoall(sendbuf, param_set->sendcount , MPI_Datatypes_used[param_set->send_type_idx] , recvbuf , param_set->recvcount , MPI_Datatypes_used[param_set->recv_type_idx] , param_set->rank_set->communicator);
+
+            // make sure only one rank prints at once, using barrier and sleep
+            print_buffers(my_rank, world_size, param_set, sendbuf, recvbuf);
+            // for (int rank=0; rank<world_size; rank++){ 
+            //     MPI_Barrier(param_set->rank_set->communicator);
+            //     DEBUG_ALLTOALL_PROFILING("Done MPI_Barrier for print from rank = %i\n", rank);
+            //     if (my_rank == rank){
+            //         printf("Buffers for RANK #%i\n", my_rank);
+            //         for (int block_idx=0; block_idx<param_set->rank_set->count; block_idx++){
+            //             printf("SENDBUF to rank #%i  : ", block_idx);
+            //             for (int idx=0; idx<param_set->sendcount; idx++){
+            //                 switch (param_set->send_type_idx){
+            //                     case 0:
+            //                         printf(" %02x ", ((uint8_t*)sendbuf)[block_idx * param_set->sendcount + idx]);
+            //                         break;
+            //                     case 1:
+            //                         printf(" %04x ", ((uint16_t*)sendbuf)[block_idx * param_set->sendcount + idx]);
+            //                         break;
+            //                     case 2:
+            //                         printf(" %08x ", ((uint32_t*)sendbuf)[block_idx * param_set->sendcount + idx]);
+            //                         break;
+            //                     case 3:
+            //                         printf(" %016x ", ((uint64_t*)sendbuf)[block_idx * param_set->sendcount + idx]);
+            //                         break;
+            //                 }
+            //             }
+            //             printf("\n");
+            //             fflush(stdout);
+            //         }
+            //         for (int block_idx=0; block_idx<param_set->rank_set->count; block_idx++){
+            //             printf("RECVBUF from rank #%i: ", block_idx);
+            //             for (int idx=0; idx<param_set->recvcount; idx++){
+            //                 switch (param_set->recv_type_idx){
+            //                     case 0:
+            //                         printf(" %02x ", ((uint8_t*)recvbuf)[block_idx * param_set->recvcount + idx]);
+            //                         break;
+            //                     case 1:
+            //                         printf(" %04x ", ((uint16_t*)recvbuf)[block_idx * param_set->recvcount + idx]);
+            //                         break;
+            //                     case 2:
+            //                         printf(" %08x ", ((uint32_t*)recvbuf)[block_idx * param_set->recvcount + idx]);
+            //                         break;
+            //                     case 3:
+            //                         printf(" %016x ", ((uint64_t*)recvbuf)[block_idx * param_set->recvcount + idx]);
+            //                         break;
+            //                 }
+            //             }
+            //             printf("\n");
+            //             fflush(stdout);
+            //         }
+            //         printf("\n");
+            //         fflush(stdout);
+            //     }
+            //     sleep(1.0);            
+            // }
+            free(recvbuf);
+            free(sendbuf);
+        }
+    }
+    fflush(stdout);
+
+
+    // test 2 - mix up send and recv counts while keeping block transferred byte size = const.
+    if (my_rank == 0){
+        printf("\n\nMPI_Alltoall test with nodes having differnt send and receive type\n");
+        fflush(stdout);
+    }
+
+    param_sets = alltoall_test_individual_node_params_sets(rank_sets);
+    // in this test the param set is dependent on the rank
+    alltoall_test_node_params_t* param_set = &param_sets[my_rank % 4];  // as we have 4 paramsets to choose from 
+
+    if (is_rank_in_rankset(my_rank, param_set->rank_set)){  // a precaution - using all 8 ranks for this test
+        printf("Creating buffer in rank %i\n", my_rank);
+        fflush(stdout);
+        DEBUG_ALLTOALL_PROFILING("creating sendbuf...\n", NULL);
+        void* sendbuf = create_sendbuf(param_set);
+        DEBUG_ALLTOALL_PROFILING("created sendbuf\n", NULL);
+        void* recvbuf = create_recvbuf(param_set);
+        DEBUG_ALLTOALL_PROFILING("created recvbuf\n", NULL);
+        // signature: MPI_Alltoall( const void* sendbuf , int sendcount , MPI_Datatype sendtype , void* recvbuf , int recvcount , MPI_Datatype recvtype , MPI_Comm comm);
+        DEBUG_ALLTOALL_PROFILING("MPI_UINT8_T = %i\n", MPI_UINT8_T);
+        //MPI_Alltoall(sendbuf, 8 , MPI_UINT8_T , recvbuf , 8 , MPI_UINT8_T , MPI_COMM_WORLD);
+        DEBUG_ALLTOALL_PROFILING("Done basic MPI_Alltoall\n", NULL);
+
+        // test that my rank is one of the communicator used in this call - if not omit this call
+        DEBUG_ALLTOALL_PROFILING("DEBUG driver prog: send type index, value, %i, %i\n", param_set->send_type_idx, MPI_Datatypes_used[param_set->send_type_idx] );
+        // note for next line - paramset has been prepared with all items using the same rankset of all 8 nodes
+        // printf("Calling alltoall in rank %i\n", my_rank);
+        // fflush(stdout);        
+        MPI_Alltoall(sendbuf, param_set->sendcount , MPI_Datatypes_used[param_set->send_type_idx] , recvbuf , param_set->recvcount , MPI_Datatypes_used[param_set->recv_type_idx] , param_set->rank_set->communicator);
+        printf("Returned from alltoall in rank %i\n", my_rank);
+        // fflush(stdout);        
+        // print_buffers(my_rank, world_size, param_set, sendbuf, recvbuf);
+        free(recvbuf);
+        free(sendbuf);
+    }
+    MPI_Finalize();
+}
\ No newline at end of file
diff --git a/src/alltoall/Makefile b/src/alltoall/Makefile
index 6f46db05..f7c704b7 100644
--- a/src/alltoall/Makefile
+++ b/src/alltoall/Makefile
@@ -11,31 +11,31 @@ include ../makefile_common.mk
 
 all: liballtoall.so liballtoall_location.so liballtoall_counts.so liballtoall_late_arrival.so liballtoall_exec_timings.so liballtoall_backtrace.so
 
-liballtoall_counts.so: ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o mpi_alltoall.c alltoall_profiler.h
-	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=0 -DENABLE_RAW_DATA=1 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o  mpi_alltoall.c -o liballtoall_counts.so
-	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=0 -DENABLE_RAW_DATA=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o mpi_alltoall.c -o liballtoall_counts_unequal.so
-	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=1 -DENABLE_RAW_DATA=1 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o  mpi_alltoall.c -o liballtoall_counts_compact.so
-	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=1 -DENABLE_RAW_DATA=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o mpi_alltoall.c -o liballtoall_counts_unequal_compact.so
-
-liballtoall_exec_timings.so: ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o  mpi_alltoall.c alltoall_profiler.h
-	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_EXEC_TIMING=1 ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o mpi_alltoall.c -o liballtoall_exec_timings.so
-	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_EXEC_TIMING=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o mpi_alltoall.c -o liballtoall_exec_timings_counts_unequal.so
-
-liballtoall_late_arrival.so: ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o mpi_alltoall.c alltoall_profiler.h
-	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LATE_ARRIVAL_TIMING=1 ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o mpi_alltoall.c -o liballtoall_late_arrival.so
-	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LATE_ARRIVAL_TIMING=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o  mpi_alltoall.c -o liballtoall_late_arrival_counts_unequal.so
-
-liballtoall_backtrace.so: ${COMMON_OBJECTS} ../common/logger_backtrace.o ../common/timings.o mpi_alltoall.c alltoall_profiler.h
-	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_BACKTRACE=1 ${COMMON_OBJECTS} ../common/logger_backtrace.o ../common/timings.o mpi_alltoall.c -o liballtoall_backtrace.so
-	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_BACKTRACE=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/logger_backtrace.o ../common/timings.o mpi_alltoall.c -o liballtoall_backtrace_counts_unequal.so
-
-liballtoall_location.so: ${COMMON_OBJECTS} ../common/logger_location.o ../common/timings.o mpi_alltoall.c alltoall_profiler.h
-	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LOCATION_TRACKING=1 ${COMMON_OBJECTS} ../common/logger_location.o ../common/timings.o mpi_alltoall.c -o liballtoall_location.so
-	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LOCATION_TRACKING=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/logger_location.o ../common/timings.o mpi_alltoall.c -o liballtoall_location_counts_unequal.so
-
-liballtoall.so: ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o mpi_alltoall.c alltoall_profiler.h
-	mpicc -I../ -I../common/ -g -shared -fPIC ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o mpi_alltoall.c -o liballtoall.so
-	mpicc -I../ -I../common/ -g -shared -fPIC -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o mpi_alltoall.c -o liballtoall_counts_unequal.so
+liballtoall_counts.so: ${COMMON_OBJECTS} mpi_alltoall.c alltoall_profiler.h
+	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=0 -DENABLE_RAW_DATA=1 ../common/logger_counts.o ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_counts.so
+	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=0 -DENABLE_RAW_DATA=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ../common/logger_counts.o ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_counts_unequal.so
+	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=1 -DENABLE_RAW_DATA=1 ../common/logger_counts.o ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_counts_compact.so
+	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=1 -DENABLE_RAW_DATA=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ../common/logger_counts.o ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_counts_unequal_compact.so
+
+liballtoall_exec_timings.so: ${COMMON_OBJECTS} ../common/logger_exec_timings.o  mpi_alltoall.c alltoall_profiler.h
+	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_EXEC_TIMING=1 ${COMMON_OBJECTS} ../common/logger_exec_timings.o mpi_alltoall.c -o liballtoall_exec_timings.so
+	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_EXEC_TIMING=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/logger_exec_timings.o mpi_alltoall.c -o liballtoall_exec_timings_counts_unequal.so
+
+liballtoall_late_arrival.so: ${COMMON_OBJECTS} ../common/logger_late_arrival_timings.o mpi_alltoall.c alltoall_profiler.h
+	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LATE_ARRIVAL_TIMING=1 ../common/logger_late_arrival_timings.o ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_late_arrival.so
+	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LATE_ARRIVAL_TIMING=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ../common/logger_late_arrival_timings.o ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_late_arrival_counts_unequal.so
+
+liballtoall_backtrace.so: ${COMMON_OBJECTS} mpi_alltoall.c alltoall_profiler.h
+	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_BACKTRACE=1 ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_backtrace.so
+	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_BACKTRACE=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_backtrace_counts_unequal.so
+
+liballtoall_location.so: ${COMMON_OBJECTS} ../common/logger_location.o mpi_alltoall.c alltoall_profiler.h
+	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LOCATION_TRACKING=1 ../common/logger_location.o ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_location.so
+	mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LOCATION_TRACKING=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ../common/logger_location.o ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_location_counts_unequal.so
+
+liballtoall.so: ${COMMON_OBJECTS} ../common/logger.o mpi_alltoall.c alltoall_profiler.h
+	mpicc -I../ -I../common/ -g -shared -fPIC ../common/logger.o ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall.so
+	mpicc -I../ -I../common/ -g -shared -fPIC ../common/logger.o -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_counts_unequal.so
 
 check: all 
 
diff --git a/tools/Makefile b/tools/Makefile
index d5a89cc3..9089f4bb 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -1,6 +1,6 @@
 # Copyright (c) 2020-2021 NVIDIA CORPORATION. All rights reserved.
 
-all: webui map profile sendprofiler recvprofiler getcounters validate srcountsanalyzer analyzetimings analyzebacktraces getcalldata getbins profilerrun
+all: webui map profile sendprofiler recvprofiler getcounters validate srcountsanalyzer analyzetimings analyzebacktraces getcalldata getbins profilerrun png
 
 profilerrun:
 	cd cmd/profilerrun; go build profilerrun.go
@@ -8,6 +8,9 @@ profilerrun:
 webui:
 	cd cmd/webui; go build webui.go
 
+png:
+	cd cmd/png; go build png.go
+
 map:
 	cd cmd/map; go build map.go
 
diff --git a/tools/cmd/png/png.go b/tools/cmd/png/png.go
new file mode 100644
index 00000000..032f25b3
--- /dev/null
+++ b/tools/cmd/png/png.go
@@ -0,0 +1,245 @@
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"image"
+	"image/color"
+	"image/png"
+	"io/ioutil"
+	"math"
+	"os"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+const max_ranks = 200
+const max_patterns = 30
+const width = 10
+
+var ranks int
+
+type TestStringList []string
+
+//元素个数
+func (t TestStringList) Len() int {
+	return len(t)
+}
+
+//比较结果
+func (t TestStringList) Less(i, j int) bool {
+	iw := weight_from_pattern(t[i])
+	jw := weight_from_pattern(t[j])
+	return iw > jw
+}
+
+//交换方式
+func (t TestStringList) Swap(i, j int) {
+	t[i], t[j] = t[j], t[i]
+}
+
+//从pattern原始文本提取weight
+func weight_from_pattern(str string) int {
+	res, _ := strconv.Atoi(strings.Split(strings.Split(str, "Count: ")[1], " calls")[0])
+	return res
+}
+
+//pattern转rgb数组
+func pattern_to_sz(str string) [max_ranks][max_ranks]int {
+	var res [max_ranks][max_ranks]int
+	totalRank, _ := strconv.Atoi(strings.Split(strings.Split(str, "Number of ranks: ")[1], "\n")[0])
+	ranks = totalRank
+	items := strings.Split(str, "Rank(s) ")
+	items = items[1:]
+	for _, item := range items {
+		part := strings.Split(item, ":")
+		tmp := strings.Split(part[0], ",")
+		var from [max_ranks]bool
+		for i := 0; i < totalRank; i++ {
+			from[i] = false
+		}
+		for _, x := range tmp {
+			if strings.Contains(x, "-") {
+				splits := strings.Split(x, "-")
+				tmpi, _ := strconv.Atoi(splits[0])
+				tmpj, _ := strconv.Atoi(splits[1])
+				for i := tmpi; i <= tmpj; i++ {
+					from[i] = true
+				}
+			} else {
+				tmpi, _ := strconv.Atoi(x)
+				from[tmpi] = true
+			}
+		}
+		dest := strings.Fields(part[1])
+		for i := 0; i < totalRank; i++ {
+			if from[i] {
+				for j, x := range dest {
+					tmpx, _ := strconv.Atoi(x)
+					res[i][j] += tmpx
+				}
+			}
+		}
+	}
+	return res
+}
+
+//数据量转rgb值，task3
+func num_to_rgb_8color(num int) [3]uint8 {
+	if num == 0 {
+		return [3]uint8{255, 255, 255} //white
+	} else if num <= 10 {
+		return [3]uint8{255, 255, 0} //yellow
+	} else if num <= 100 {
+		return [3]uint8{255, 165, 0} //orange
+	} else if num <= 1000 {
+		return [3]uint8{0, 255, 0} //green
+	} else if num <= 10000 {
+		return [3]uint8{255, 0, 0} //red
+	} else if num <= 100000 {
+		return [3]uint8{160, 32, 240} //purple
+	} else if num <= 1000000 {
+		return [3]uint8{165, 42, 42} //brown
+	} else {
+		return [3]uint8{0, 0, 0} //black
+	}
+}
+
+//输出不大于的num的最大的10的n次幂
+func num_to_low(num int) int {
+	times := int(math.Log10(float64(num)))
+	res := 1
+	for i := 0; i < times; i++ {
+		res *= 10
+	}
+	return res
+}
+
+//数据量转rgb值，task5的第一种方法（线性）
+func num_to_rgb_linear(num int) [3]uint8 {
+	depth := num / 4000
+	if depth > 255 {
+		depth = 255
+	}
+	depth = 255 - depth
+	return [3]uint8{uint8(depth), uint8(depth), uint8(depth)}
+}
+
+//数据量转rgb值，task5的第二种方法（对数）
+func num_to_rgb_Logarithmic(num int) [3]uint8 {
+	depth := math.Log10(float64(num+1)) * 42.66
+	if depth > 255 {
+		depth = 255
+	}
+	depth = 255 - depth
+	return [3]uint8{uint8(depth), uint8(depth), uint8(depth)}
+}
+
+//数据量转rgb值，task5的第三种方法（自己的）
+func num_to_rgb_own(num int) [3]uint8 {
+	low := num_to_low(num)
+	high := low * 10
+	if low == 1 {
+		low = 0
+	}
+	lowrgb := num_to_rgb_8color(low)
+	highrgb := num_to_rgb_8color(high)
+
+	var resrgb [3]uint8
+	for i := 0; i < 3; i++ {
+		resrgb[i] = uint8((float64(lowrgb[i])*float64(high-num) + float64(highrgb[i])*float64(num-low)) / float64(high-low))
+	}
+	return resrgb
+}
+
+//rgb数组转png图像
+func sz_to_png(sz [max_ranks][max_ranks]int, path string, mode int) {
+	file, err := os.Create(path)
+	if err != nil {
+		fmt.Println(err)
+	}
+	defer file.Close()
+	rgba := image.NewRGBA(image.Rect(0, 0, ranks*width, ranks*width))
+	for x := 0; x < ranks*width; x++ {
+		for y := 0; y < ranks*width; y++ {
+			var rgb [3]uint8
+			if mode == 0 {
+				rgb = num_to_rgb_8color(sz[x/width][y/width])
+			} else if mode == 1 {
+				rgb = num_to_rgb_linear(sz[x/width][y/width])
+			} else if mode == 2 {
+				rgb = num_to_rgb_Logarithmic(sz[x/width][y/width])
+			} else if mode == 3 {
+				rgb = num_to_rgb_own(sz[x/width][y/width])
+			}
+
+			rgba.Set(x, y, color.RGBA{rgb[0], rgb[1], rgb[2], 255})
+		}
+	}
+	err = png.Encode(file, rgba)
+	if err != nil {
+		fmt.Println(err)
+	}
+}
+func main() {
+	var filein string
+	var filepath string
+	if len(os.Args) > 1 {
+		filein = os.Args[1]
+		lastpiepos := strings.LastIndex(filein, "/")
+		if lastpiepos == -1 {
+			fmt.Println("Can't guess path of input file")
+			os.Exit(0)
+		}
+		filepath = filein[:lastpiepos]
+	} else {
+		fmt.Println("No input file!")
+		return
+	}
+	data, err := ioutil.ReadFile(filein) //输入文件路径
+	if err != nil {
+		fmt.Println("File reading error", err)
+		return
+	}
+	//截取pattern并排序
+	pattern := strings.Split(string(data)[1:], "#")
+	sort.Sort(TestStringList(pattern))
+	//输出每个pattern的weight到weight.txt
+	outputFile, outputError := os.OpenFile(filepath+"/weight.txt", os.O_WRONLY|os.O_CREATE, 0666)
+	if outputError != nil {
+		fmt.Println(outputError)
+		return
+	}
+	defer outputFile.Close()
+	outputWriter := bufio.NewWriter(outputFile)
+	var weight [max_patterns]int
+	for i := 0; i < len(pattern); i++ {
+		weight[i] = weight_from_pattern(pattern[i])
+		outputWriter.WriteString(strconv.Itoa(i) + " " + strconv.Itoa(weight[i]) + "\n")
+	}
+	outputWriter.Flush()
+	//计算rgb数组的加权和，
+	var allsz [max_ranks][max_ranks]int
+	for i := 0; i < len(pattern); i++ {
+		if i > 10 {
+			break
+		}
+		sz := pattern_to_sz(pattern[i])
+		for x := 0; x < max_ranks; x++ {
+			for y := 0; y < max_ranks; y++ {
+				allsz[x][y] += sz[x][y] * weight[i]
+			}
+		}
+
+		sz_to_png(sz, filepath+"/"+strconv.Itoa(i)+"_task3.png", 0)
+		sz_to_png(sz, filepath+"/"+strconv.Itoa(i)+"_task5_linear.png", 1)
+		sz_to_png(sz, filepath+"/"+strconv.Itoa(i)+"_task5_log.png", 2)
+		sz_to_png(sz, filepath+"/"+strconv.Itoa(i)+"_task5_own.png", 3)
+	}
+	sz_to_png(allsz, filepath+"/task4.png", 0)
+	sz_to_png(allsz, filepath+"/task4_linear.png", 1)
+	sz_to_png(allsz, filepath+"/task4_log.png", 2)
+	sz_to_png(allsz, filepath+"/task4_own.png", 3)
+
+}
diff --git a/tools/cmd/validate/validate.go b/tools/cmd/validate/validate.go
index 154b66b6..31cdecae 100644
--- a/tools/cmd/validate/validate.go
+++ b/tools/cmd/validate/validate.go
@@ -50,38 +50,11 @@ const (
 	exampleBinaryBigCountsC = "alltoallv_bigcounts_c"
 	exampleBinaryDatatypeC  = "alltoallv_dt_c"
 
-	sharedLibAlltoAllUnequalCounts        = "liballtoall_counts_unequal.so"
-	sharedLibAlltoAllUnequalCountsCompact = "liballtoall_counts_unequal_compact.so" // an extra one compared to alltoallv ones above
-	sharedLibAlltoAllUnequalBacktrace     = "liballtoall_backtrace_counts_unequal.so"
-	sharedLibAlltoAllUnequalLocation      = "liballtoall_location_counts_unequal.so"
-	sharedLibAlltoAllUnequalLateArrival   = "liballtoall_late_arrival_counts_unequal.so"
-	sharedLibAlltoAllUnequalA2ATime       = "liballtoall_exec_timings_counts_unequal.so"
-
-	exampleFileAlltoallSimpleC    = "alltoall_simple_c.c" // TODO add some rows for other alltoall test programs - each will need a test struct below
-	exampleFileAlltoallBigcountsC = "alltoall_bigcounts_c.c"
-	exampleFileAlltoallMulticommC = "alltoall_multicomms_c.c"
-	exampleFileAlltoallDatatypeC  = "alltoall_dt_c.c"
-
-	exampleBinaryAlltoallSimpleC    = "alltoall_simple_c"
-	exampleBinaryAlltoallBigcountsC = "alltoall_bigcounts_c"
-	exampleBinaryAlltoallMulticommC = "alltoall_multicomms_c"
-	exampleBinaryAlltoallDatatypeC  = "alltoall_dt_c"
-
 	expectedIndexPageFile = "common_expected_index.html"
-
-	noValidationStep              = 0
-	allValidationSteps            = 1
-	traceGenerationStep           = 2
-	postmortemSRCountAnalyzerStep = 3
-	postmortemProfilerStep        = 4
-	webuiStep                     = 5
 )
 
 // Test gathers all the information required to run a specific test
 type Test struct {
-	collective                     string
-	requestedValidationStepsToRun  []int
-	validationStepsToRun           map[int]bool
 	np                             int
 	source                         string
 	binary                         string
@@ -107,41 +80,7 @@ type Test struct {
 
 type testCfg struct {
 	tempDir string
-	cfg     *Test
-}
-
-type validationCfg struct {
-	sharedLibraries []string
-	tests           []Test
-	testCfgs        map[string]*testCfg
-}
-
-func (v *validationCfg) updateValidationStepsDependencies() {
-	for _, tt := range v.tests {
-		v.testCfgs[tt.binary].cfg.validationStepsToRun = make(map[int]bool)
-
-		for _, step := range v.testCfgs[tt.binary].cfg.requestedValidationStepsToRun {
-			if step == allValidationSteps {
-				for i := 0; i <= webuiStep; i++ {
-					v.testCfgs[tt.binary].cfg.validationStepsToRun[i] = true
-				}
-			}
-
-			if step == webuiStep {
-				v.testCfgs[tt.binary].cfg.validationStepsToRun[postmortemProfilerStep] = true
-			}
-
-			if step == postmortemProfilerStep || step == postmortemSRCountAnalyzerStep {
-				v.testCfgs[tt.binary].cfg.validationStepsToRun[traceGenerationStep] = true
-			}
-
-			v.testCfgs[tt.binary].cfg.validationStepsToRun[step] = true
-		}
-	}
-}
-
-func validationStepIsSet(tt *Test, requestedStep int) bool {
-	return tt.validationStepsToRun[requestedStep]
+	cfg     Test
 }
 
 func validateCountProfiles(dir string, jobid int, id int) error {
@@ -173,7 +112,7 @@ func checkOutputFiles(expectedOutputDir string, tempDir string, expectedFiles []
 		}
 		if hashRefFile != hashResultFile {
 			fmt.Println(" failed")
-			return fmt.Errorf("invalid output, send counters do not match (%s vs. %s)", resultFile, referenceFile)
+			return fmt.Errorf("Invalid output, send counters do not match (%s vs. %s)", resultFile, referenceFile)
 		}
 		fmt.Println(" ok")
 	}
@@ -181,7 +120,7 @@ func checkOutputFiles(expectedOutputDir string, tempDir string, expectedFiles []
 	return nil
 }
 
-func checkFormatTimingFile(filepath string, codeBaseDir string, expectedNumCalls int, expectedNumRanks int, tt *Test) error {
+func checkFormatTimingFile(filepath string, codeBaseDir string, expectedNumCalls int, expectedNumRanks int, tt Test) error {
 	md, _, _, err := timings.ParseTimingFile(filepath, codeBaseDir)
 	if err != nil {
 		return fmt.Errorf("timings.ParseTimingFile(() failed: %s", err)
@@ -195,7 +134,7 @@ func checkFormatTimingFile(filepath string, codeBaseDir string, expectedNumCalls
 	return nil
 }
 
-func checkOutput(codeBaseDir string, tempDir string, tt *Test) error {
+func checkOutput(codeBaseDir string, tempDir string, tt Test) error {
 	expectedOutputDir := filepath.Join(codeBaseDir, "tests", tt.binary, "expectedOutput")
 
 	fmt.Printf("Checking if %s exist(s)...\n", tt.expectedSendCompactCountsFiles)
@@ -368,31 +307,25 @@ func validateDatasetProfiler(codeBaseDir string, collectiveName string, testCfg
 }
 
 func validateTestPostmortemResults(codeBaseDir string, collectiveName string, testCfg *testCfg) error {
-	if validationStepIsSet(testCfg.cfg, postmortemSRCountAnalyzerStep) {
-		err := validateTestSRCountsAnalyzer(testCfg.cfg.binary, testCfg.tempDir)
-		if err != nil {
-			return err
-		}
+	err := validateTestSRCountsAnalyzer(testCfg.cfg.binary, testCfg.tempDir)
+	if err != nil {
+		return err
 	}
 
-	if validationStepIsSet(testCfg.cfg, postmortemProfilerStep) {
-		err := validateDatasetProfiler(codeBaseDir, collectiveName, testCfg)
-		if err != nil {
-			return err
-		}
+	err = validateDatasetProfiler(codeBaseDir, collectiveName, testCfg)
+	if err != nil {
+		return err
 	}
 
 	return nil
 }
 
-func (v *validationCfg) postmortemAnalysisTools(codeBaseDir string, collectiveName string) error {
-	for source, testCfg := range v.testCfgs {
-		if validationStepIsSet(testCfg.cfg, postmortemSRCountAnalyzerStep) || validationStepIsSet(testCfg.cfg, postmortemProfilerStep) {
-			err := validateTestPostmortemResults(codeBaseDir, collectiveName, testCfg)
-			if err != nil {
-				fmt.Printf("validation of the postmortem analysis for %s in %s failed: %s\n", source, testCfg.tempDir, err)
-				return err
-			}
+func validatePostmortemAnalysisTools(codeBaseDir string, collectiveName string, profilerResults map[string]*testCfg) error {
+	for source, testCfg := range profilerResults {
+		err := validateTestPostmortemResults(codeBaseDir, collectiveName, testCfg)
+		if err != nil {
+			fmt.Printf("validation of the postmortem analysis for %s in %s failed: %s\n", source, testCfg.tempDir, err)
+			return err
 		}
 	}
 
@@ -407,7 +340,7 @@ func compareResultWithFileContent(filePath string, content string) (bool, error)
 	expectedContent := string(data)
 
 	if content != expectedContent {
-		fmt.Printf("the content returned when accessing the page does not match expectation:\n%s\nvs.\n%s", content, expectedContent)
+		fmt.Printf("the content returned when accessing the the index page does not match expectation:\n%s\nvs.\n%s", content, expectedContent)
 		return false, nil
 	}
 	return true, nil
@@ -459,9 +392,6 @@ func validateIndexPage(codeBaseDir string, cfg *webui.Config) error {
 	defer resp.Body.Close()
 
 	body, err := ioutil.ReadAll(resp.Body)
-	if err != nil {
-		return err
-	}
 	bs := string(body)
 	return checkIndexPageContent(codeBaseDir, bs)
 }
@@ -490,9 +420,6 @@ func validateCallPage(codeBaseDir string, cfg *webui.Config, testCfg *testCfg) e
 	defer resp.Body.Close()
 
 	body, err := ioutil.ReadAll(resp.Body)
-	if err != nil {
-		return err
-	}
 	bs := string(body)
 
 	return checkCallPageContent(codeBaseDir, testCfg, bs)
@@ -508,9 +435,6 @@ func validatePatternsPage(codeBaseDir string, cfg *webui.Config, testCfg *testCf
 	defer resp.Body.Close()
 
 	body, err := ioutil.ReadAll(resp.Body)
-	if err != nil {
-		return err
-	}
 	bs := string(body)
 
 	return checkPatternsPageContent(codeBaseDir, testCfg, bs)
@@ -574,198 +498,30 @@ func validateWebUIForTest(codeBaseDir string, testCfg *testCfg, port int) error
 	return nil
 }
 
-func (v *validationCfg) webUI(codeBaseDir string, collectiveName string) error {
+func validateWebUI(codeBaseDir string, collectiveName string, profilerResults map[string]*testCfg) error {
 	fmt.Println("- Validating the webUI")
 	port := webui.DefaultPort
 
-	for _, testCfg := range v.testCfgs {
-		if validationStepIsSet(testCfg.cfg, webuiStep) {
-			err := validateWebUIForTest(codeBaseDir, testCfg, port)
-			if err != nil {
-				return fmt.Errorf("validateWebUIForTest() failed: %s", err)
-			}
-			port++
+	for _, testCfg := range profilerResults {
+		err := validateWebUIForTest(codeBaseDir, testCfg, port)
+		if err != nil {
+			return fmt.Errorf("validateWebUIForTest() failed: %s", err)
 		}
+		port++
 	}
 
 	return nil
 }
 
-// profiler runs the profiler against examples and compare the resuls to the results output.
+// validateProfiler runs the profiler against examples and compare the resuls to the results output.
 // If keepResults is set to true, the results are *not* removed after execution. They can then be used
 // later on to validate postmortem analysis.
-func (v *validationCfg) profiler(keepResults bool, fullValidation bool) error {
-	_, filename, _, _ := runtime.Caller(0)
-	codeBaseDir := filepath.Join(filepath.Dir(filename), "..", "..", "..")
-
-	// Find MPI
-	mpiBin, err := exec.LookPath("mpirun")
-	if err != nil {
-		return err
-	}
-
-	// Find make
-	makeBin, err := exec.LookPath("make")
-	if err != nil {
-		return err
-	}
-
-	// Compile both the profiler libraries and the example
-	log.Println("Building libraries and tests...")
-	cmd := exec.Command(makeBin, "clean", "all")
-	cmd.Dir = filepath.Join(codeBaseDir, "src", "alltoallv")
-	err = cmd.Run()
-	if err != nil {
-		return err
-	}
-
-	cmd = exec.Command(makeBin, "clean", "all")
-	cmd.Dir = filepath.Join(codeBaseDir, "examples")
-	err = cmd.Run()
-	if err != nil {
-		return err
-	}
-
-	for testName, tt := range v.testCfgs {
-		// Create a temporary directory where to store the results
-		tempDir, err := ioutil.TempDir("", "")
-		if err != nil {
-			return err
-		}
-		v.testCfgs[testName].tempDir = tempDir
-
-		// Run the profiler
-		// todo: use https://github.com/gvallee/go_hpc_jobmgr so we can easilty validate on local machine and clusters
-		if validationStepIsSet(tt.cfg, traceGenerationStep) {
-			var stdout, stderr bytes.Buffer
-			for _, lib := range v.sharedLibraries {
-				pathToLib := filepath.Join(codeBaseDir, "src", tt.cfg.collective, lib)
-				fmt.Printf("Running MPI application (%s) and gathering profiles with %s...\n", testName, pathToLib)
-				cmd = exec.Command(mpiBin, "-np", strconv.Itoa(tt.cfg.np), "--oversubscribe", filepath.Join(codeBaseDir, "examples", testName))
-				cmd.Env = append(os.Environ(),
-					"LD_PRELOAD="+pathToLib,
-					"A2A_PROFILING_OUTPUT_DIR="+tempDir)
-				cmd.Dir = tempDir
-				cmd.Stdout = &stdout
-				cmd.Stderr = &stderr
-				err = cmd.Run()
-				if err != nil {
-					return fmt.Errorf("mpirun failed.\n\tstdout: %s\n\tstderr: %s", stdout.String(), stderr.String())
-				}
-			}
-
-			// Check the results
-			err = checkOutput(codeBaseDir, tempDir, tt.cfg)
-			if err != nil {
-				return err
-			}
-
-			// We clean up *only* when tests are successful and
-			// if results do not need to be kept
-			if !keepResults {
-				os.RemoveAll(tempDir)
-			}
-		}
-	}
-
-	return nil
-}
-
-func main() {
-	verbose := flag.Bool("v", false, "Enable verbose mode")
-	counts := flag.Bool("counts", false, "Validate the count data generated during the validation run of the profiler with an MPI application. Requires the following additional options: -dir, -job, -id.")
-	profilerValidation := flag.Bool("profiler", false, "Perform a validation of the profiler itself running various tests. Requires MPI. Does not require any additional option.")
-	postmortemValidation := flag.Bool("postmortem", false, "Perform a validation of the postmortem analysis tools.")
-	full := flag.Bool("full", true, "Run the full validation. WARNING! This may generate a huge amount of files and create file system issues!")
-	dir := flag.String("dir", "", "Where all the data is")
-	id := flag.Int("id", 0, "Identifier of the experiment, e.g., X from <pidX> in the profile file name")
-	jobid := flag.Int("jobid", 0, "Job ID associated to the count files")
-	help := flag.Bool("h", false, "Help message")
-	webui := flag.Bool("webui", false, "Validate the WebUI")
-
-	flag.Parse()
-
+func validateProfiler(keepResults bool, fullValidation bool) (map[string]*testCfg, error) {
 	defaultListGraphs := fmt.Sprintf("0-%d", profiler.DefaultNumGeneratedGraphs)
 	bigListGraphs := "0-999"
-	sharedLibraries := []string{sharedLibCounts, sharedLibBacktrace, sharedLibLocation, sharedLibLateArrival, sharedLibA2ATime,
-		sharedLibAlltoAllUnequalCounts, sharedLibAlltoAllUnequalCountsCompact, sharedLibAlltoAllUnequalBacktrace,
-		sharedLibAlltoAllUnequalLocation, sharedLibAlltoAllUnequalLateArrival, sharedLibAlltoAllUnequalA2ATime}
+	sharedLibraries := []string{sharedLibCounts, sharedLibBacktrace, sharedLibLocation, sharedLibLateArrival, sharedLibA2ATime}
 	validationTests := []Test{
 		{
-			collective:                     "alltoall",
-			requestedValidationStepsToRun:  []int{traceGenerationStep},
-			np:                             4,
-			totalNumCalls:                  1,
-			numCallsPerComm:                []int{1},
-			numRanksPerComm:                []int{4},
-			source:                         exampleFileAlltoallSimpleC,
-			binary:                         exampleBinaryAlltoallSimpleC,
-			expectedSendCompactCountsFiles: []string{"send-counters.job0.rank0.txt"},
-			expectedRecvCompactCountsFiles: []string{"recv-counters.job0.rank0.txt"},
-			// todo: expectedCountsFiles
-			expectedLocationFiles:    []string{"alltoall_locations_comm0_rank0.md"},
-			expectedExecTimeFiles:    []string{"alltoall_execution_times.rank0_comm0_job0.md"},
-			expectedLateArrivalFiles: []string{"alltoall_late_arrival_times.rank0_comm0_job0.md"},
-			expectedBacktraceFiles:   []string{"alltoall_backtrace_rank0_trace0.md"}, // TODO What about an entry for "alltoall_comm_data_rank0.md", "counts.rank0_call0.md" and "counts.rank0_call0.md"???
-			//profilerStepsToExecute:         profiler.AllSteps,	//??? What is this
-		},
-		{
-			collective:                     "alltoall",
-			requestedValidationStepsToRun:  []int{traceGenerationStep},
-			np:                             4,
-			totalNumCalls:                  1000,
-			numCallsPerComm:                []int{1000},
-			numRanksPerComm:                []int{4},
-			source:                         exampleFileAlltoallBigcountsC,
-			binary:                         exampleBinaryAlltoallBigcountsC,
-			expectedSendCompactCountsFiles: []string{"send-counters.job0.rank0.txt"},
-			expectedRecvCompactCountsFiles: []string{"recv-counters.job0.rank0.txt"},
-			// todo: expectedCountsFiles
-			expectedLocationFiles:    []string{"alltoall_locations_comm0_rank0.md"},
-			expectedExecTimeFiles:    []string{"alltoall_execution_times.rank0_comm0_job0.md"},
-			expectedLateArrivalFiles: []string{"alltoall_late_arrival_times.rank0_comm0_job0.md"},
-			expectedBacktraceFiles:   []string{"alltoall_backtrace_rank0_trace0.md"}, // TODO What about an entry for "alltoall_comm_data_rank0.md", "counts.rank0_call0.md" and "counts.rank0_call0.md"???
-			//profilerStepsToExecute:         profiler.AllSteps,	//??? What is this
-		},
-		/* This test does not pass validation yet
-		{
-			collective:                     "alltoall",
-			requestedValidationStepsToRun:  []int{traceGenerationStep},
-			np:                             4,
-			totalNumCalls:                  2,
-			numCallsPerComm:                []int{1, 1}, // 1, 2, 2, 1, 1, 3, 3},
-			numRanksPerComm:                []int{4, 3}, // 3, 3, 3, 2, 2, 2, 2},
-			source:                         exampleFileAlltoallMulticommC,
-			binary:                         exampleBinaryAlltoallMulticommC,
-			expectedSendCompactCountsFiles: []string{"send-counters.job0.rank0.txt", "send-counters.job0.rank1.txt"},
-			expectedRecvCompactCountsFiles: []string{"recv-counters.job0.rank0.txt", "recv-counters.job0.rank1.txt"},
-			// todo: expectedCountsFiles
-			expectedLocationFiles:    []string{"alltoall_locations_comm0_rank0.md", "alltoall_locations_comm0_rank0.md"},
-			expectedExecTimeFiles:    []string{"alltoall_execution_times.rank0_comm0_job0.md", "alltoall_execution_times.rank1_comm0_job0.md"},
-			expectedLateArrivalFiles: []string{"alltoall_late_arrival_times.rank0_comm0_job0.md", "alltoall_late_arrival_times.rank1_comm0_job0.md"},
-			expectedBacktraceFiles:   []string{"alltoall_backtrace_rank0_trace0.md", "alltoall_backtrace_rank1_trace0.md"},
-		},
-		*/
-		{
-			collective:                     "alltoall",
-			requestedValidationStepsToRun:  []int{traceGenerationStep},
-			np:                             4,
-			totalNumCalls:                  4,
-			numCallsPerComm:                []int{4},
-			numRanksPerComm:                []int{4},
-			source:                         exampleFileAlltoallDatatypeC,
-			binary:                         exampleBinaryAlltoallDatatypeC,
-			expectedSendCompactCountsFiles: []string{"send-counters.job0.rank0.txt"},
-			expectedRecvCompactCountsFiles: []string{"recv-counters.job0.rank0.txt"},
-			// todo: expectedCountsFiles
-			expectedLocationFiles:    []string{"alltoall_locations_comm0_rank0.md"},
-			expectedExecTimeFiles:    []string{"alltoall_execution_times.rank0_comm0_job0.md"},
-			expectedLateArrivalFiles: []string{"alltoall_late_arrival_times.rank0_comm0_job0.md"},
-			expectedBacktraceFiles:   []string{"alltoall_backtrace_rank0_trace0.md"},
-		},
-		{
-			collective:                     "alltoallv",
-			requestedValidationStepsToRun:  []int{allValidationSteps},
 			np:                             4,
 			totalNumCalls:                  1,
 			numCallsPerComm:                []int{1},
@@ -787,8 +543,6 @@ func main() {
 			listGraphsToGenerate:     []string{defaultListGraphs},
 		},
 		{
-			collective:                     "alltoallv",
-			requestedValidationStepsToRun:  []int{allValidationSteps},
 			np:                             3,
 			totalNumCalls:                  2,
 			numCallsPerComm:                []int{2},
@@ -810,8 +564,6 @@ func main() {
 			listGraphsToGenerate:     []string{defaultListGraphs},
 		},
 		{
-			collective:                     "alltoallv",
-			requestedValidationStepsToRun:  []int{allValidationSteps},
 			np:                             4,
 			totalNumCalls:                  3,
 			numCallsPerComm:                []int{2, 1},
@@ -833,8 +585,6 @@ func main() {
 			listGraphsToGenerate:     []string{defaultListGraphs},
 		},
 		{
-			collective:                     "alltoallv",
-			requestedValidationStepsToRun:  []int{allValidationSteps},
 			np:                             4,
 			totalNumCalls:                  2,
 			numCallsPerComm:                []int{2},
@@ -857,11 +607,9 @@ func main() {
 		},
 	}
 
-	if *full {
+	if fullValidation {
 		extaTests := []Test{
 			{
-				collective:                     "alltoallv",
-				requestedValidationStepsToRun:  []int{allValidationSteps},
 				np:                             4, // This test runs a large number of interations over a collective with a limited number of ranks
 				totalNumCalls:                  1000000,
 				numCallsPerComm:                []int{1000000},
@@ -886,6 +634,114 @@ func main() {
 		validationTests = append(validationTests, extaTests...)
 	}
 
+	_, filename, _, _ := runtime.Caller(0)
+	codeBaseDir := filepath.Join(filepath.Dir(filename), "..", "..", "..")
+
+	// Find MPI
+	mpiBin, err := exec.LookPath("mpirun")
+	if err != nil {
+		return nil, err
+	}
+
+	// Find make
+	makeBin, err := exec.LookPath("make")
+	if err != nil {
+		return nil, err
+	}
+
+	// Compile both the profiler libraries and the example
+	log.Println("Building libraries and tests...")
+	cmd := exec.Command(makeBin, "clean", "all")
+	cmd.Dir = filepath.Join(codeBaseDir, "src", "alltoallv")
+	err = cmd.Run()
+	if err != nil {
+		return nil, err
+	}
+
+	cmd = exec.Command(makeBin, "clean", "all")
+	cmd.Dir = filepath.Join(codeBaseDir, "examples")
+	err = cmd.Run()
+	if err != nil {
+		return nil, err
+	}
+
+	// Create a map to store the data about all the directories where
+	// results are created when the results need to be kept
+	var results map[string]*testCfg
+	if keepResults {
+		results = make(map[string]*testCfg)
+	}
+
+	for _, tt := range validationTests {
+		// Create a temporary directory where to store the results
+		tempDir, err := ioutil.TempDir("", "")
+		if err != nil {
+			return nil, err
+		}
+
+		if keepResults {
+			cfg := new(testCfg)
+			cfg.tempDir = tempDir
+			cfg.cfg = tt
+			results[tt.binary] = cfg
+		}
+
+		// Run the profiler
+		// todo: use https://github.com/gvallee/go_hpc_jobmgr so we can easilty validate on local machine and clusters
+		var stdout, stderr bytes.Buffer
+		for _, lib := range sharedLibraries {
+			pathToLib := filepath.Join(codeBaseDir, "src", "alltoallv", lib)
+			fmt.Printf("Running MPI application (%s) and gathering profiles with %s...\n", tt.binary, pathToLib)
+			cmd = exec.Command(mpiBin, "-np", strconv.Itoa(tt.np), "--oversubscribe", filepath.Join(codeBaseDir, "examples", tt.binary))
+			cmd.Env = append(os.Environ(),
+				"LD_PRELOAD="+pathToLib,
+				"A2A_PROFILING_OUTPUT_DIR="+tempDir)
+			cmd.Dir = tempDir
+			cmd.Stdout = &stdout
+			cmd.Stderr = &stderr
+			err = cmd.Run()
+			if err != nil {
+				return nil, fmt.Errorf("mpirun failed.\n\tstdout: %s\n\tstderr: %s", stdout.String(), stderr.String())
+			}
+		}
+
+		// Check the results
+		err = checkOutput(codeBaseDir, tempDir, tt)
+		if err != nil {
+			return nil, err
+		}
+
+		// We clean up *only* when tests are successful and
+		// if results do not need to be kept
+		if !keepResults {
+			os.RemoveAll(tempDir)
+		}
+	}
+
+	// Return the map describing the data resulting from the tests only
+	// when the results need to be kept to later on validate postmortem
+	// analysis
+	if keepResults {
+		return results, nil
+	}
+
+	return nil, nil
+}
+
+func main() {
+	verbose := flag.Bool("v", false, "Enable verbose mode")
+	counts := flag.Bool("counts", false, "Validate the count data generated during the validation run of the profiler with an MPI application. Requires the following additional options: -dir, -job, -id.")
+	profiler := flag.Bool("profiler", false, "Perform a validation of the profiler itself running various tests. Requires MPI. Does not require any additional option.")
+	postmortem := flag.Bool("postmortem", false, "Perform a validation of the postmortem analysis tools.")
+	full := flag.Bool("full", true, "Run the full validation. WARNING! This may generate a huge amount of files and create file system issues!")
+	dir := flag.String("dir", "", "Where all the data is")
+	id := flag.Int("id", 0, "Identifier of the experiment, e.g., X from <pidX> in the profile file name")
+	jobid := flag.Int("jobid", 0, "Job ID associated to the count files")
+	help := flag.Bool("h", false, "Help message")
+	webui := flag.Bool("webui", false, "Validate the WebUI")
+
+	flag.Parse()
+
 	cmdName := filepath.Base(os.Args[0])
 	if *help {
 		fmt.Printf("%s validates various aspects of this infrastructure", cmdName)
@@ -903,7 +759,7 @@ func main() {
 		log.SetOutput(ioutil.Discard)
 	}
 
-	if !*counts && !*profilerValidation && !*postmortemValidation && !*webui {
+	if !*counts && !*profiler && !*postmortem && !*webui {
 		fmt.Println("No valid option selected, run '-h' for more details")
 		os.Exit(1)
 	}
@@ -914,47 +770,32 @@ func main() {
 	collectiveName := "alltoallv" // hardcoded for now, detection coming soon
 
 	if *webui {
-		*postmortemValidation = true
-	}
-
-	// Create a map to store the data about all the directories where
-	// results are created when the results need to be kept
-	validation := new(validationCfg)
-	validation.tests = validationTests
-	validation.sharedLibraries = sharedLibraries
-	validation.testCfgs = make(map[string]*testCfg)
-
-	for idx, tt := range validationTests {
-		cfg := new(testCfg)
-		cfg.cfg = &validationTests[idx]
-		validation.testCfgs[tt.binary] = cfg
+		*postmortem = true
 	}
 
-	validation.updateValidationStepsDependencies()
-
-	if *profilerValidation && !*postmortemValidation {
-		err := validation.profiler(false, *full)
+	if *profiler && !*postmortem {
+		_, err := validateProfiler(false, *full)
 		if err != nil {
 			fmt.Printf("Validation of the infrastructure failed: %s\n", err)
 			os.Exit(1)
 		}
 	}
 
-	if *postmortemValidation {
-		err := validation.profiler(true, *full)
-		if err != nil {
+	if *postmortem {
+		profilerValidationResults, err := validateProfiler(true, *full)
+		if err != nil || profilerValidationResults == nil {
 			fmt.Printf("Validation of the infrastructure failed: %s\n", err)
 			os.Exit(1)
 		}
 
-		err = validation.postmortemAnalysisTools(codeBaseDir, collectiveName)
+		err = validatePostmortemAnalysisTools(codeBaseDir, collectiveName, profilerValidationResults)
 		if err != nil {
 			fmt.Printf("Validation of the postmortem analysis tools failed: %s\n", err)
 			os.Exit(1)
 		}
 
 		if *webui {
-			err := validation.webUI(codeBaseDir, collectiveName)
+			err := validateWebUI(codeBaseDir, collectiveName, profilerValidationResults)
 			if err != nil {
 				fmt.Printf("Validation of the WebUI failed: %s", err)
 				os.Exit(1)
@@ -962,7 +803,7 @@ func main() {
 		}
 
 		// If successful, we can then delete all the directory that were created
-		for _, cfg := range validation.testCfgs {
+		for _, cfg := range profilerValidationResults {
 			os.RemoveAll(cfg.tempDir)
 		}
 
diff --git a/tools/internal/pkg/plot/plot.go b/tools/internal/pkg/plot/plot.go
index 8d54a038..570efa5d 100644
--- a/tools/internal/pkg/plot/plot.go
+++ b/tools/internal/pkg/plot/plot.go
@@ -216,45 +216,31 @@ func (d *plotData) generateCallsAvgs(hostname string, leadRank int, callID int)
 	for _, rank := range ranks {
 		d.sendRankBW[rank] = float64(d.sendHeatMap[rank]) / d.execTimeMap[rank]
 		d.recvRankBW[rank] = float64(d.recvHeatMap[rank]) / d.execTimeMap[rank]
-
-		// If the average is different from 0, we try to scale it and hope that the scale
-		// will match what we already have for other values. If not, we fail, we have no
-		// mechanism to put various data to the same scale at the moment.
-		// So, before starting to do some calculation, we assume the following default values
-		// which are used when the average is equal to 0:
-		// - the scaled BW is equal to non-scaled BW
-		// - the unit is the one previous detected (by default the average is assumed to be
-		// 	 equal to 0 so it does not matter)
-		// Also note that if the rank is not in the communicator, the value is set to 'NaN'
-
-		scaledSendRankBW := d.sendRankBW
-		if d.sendRankBW[rank] != 0 && !math.IsNaN(d.sendRankBW[rank]) {
-			scaledSendRankBWUnit := d.sBWUnit
-			scaledSendRankBWUnit, scaledSendRankBW, err = scale.MapFloat64s("B/s", d.sendRankBW)
-			if err != nil {
-				return err
-			}
-			if d.sBWUnit != "" && d.sBWUnit != scaledSendRankBWUnit {
-				return fmt.Errorf("detected different scales for BW send data: %s vs. %s (rank=%d, value=%f)", d.sBWUnit, scaledSendRankBWUnit, rank, d.sendRankBW[rank])
-			}
-			if d.sBWUnit == "" {
-				d.sBWUnit = scaledSendRankBWUnit
-			}
+		scaledSendRankBWUnit, scaledSendRankBW, err := scale.MapFloat64s("B/s", d.sendRankBW)
+		if err != nil {
+			return err
 		}
-
-		scaledRecvRankBW := d.recvRankBW
-		if d.recvRankBW[rank] != 0 && !math.IsNaN(d.recvRankBW[rank]) {
-			scaledRecvRankBWUnit := d.rBWUnit
-			scaledRecvRankBWUnit, scaledRecvRankBW, err = scale.MapFloat64s("B/s", d.recvRankBW)
-			if err != nil {
-				return err
-			}
-			if d.rBWUnit != "" && d.rBWUnit != scaledRecvRankBWUnit {
-				return fmt.Errorf("detected different scales for BW recv data: %s vs. %s (rank=%d, value=%f)", d.rBWUnit, scaledRecvRankBWUnit, rank, d.recvRankBW[rank])
-			}
-			if d.rBWUnit == "" {
-				d.rBWUnit = scaledRecvRankBWUnit
-			}
+		scaledRecvRankBWUnit, scaledRecvRankBW, err := scale.MapFloat64s("B/s", d.recvRankBW)
+		if err != nil {
+			return err
+		}
+		if d.sBWUnit != "" && d.sBWUnit != scaledSendRankBWUnit {
+			return fmt.Errorf("detected different scales for BW data")
+		}
+		if d.rBWUnit != "" && d.rBWUnit != scaledRecvRankBWUnit {
+			return fmt.Errorf("detected different scales for BW data")
+		}
+		if d.sBWUnit != "" && d.sBWUnit != scaledSendRankBWUnit {
+			return fmt.Errorf("detected different scales for BW data")
+		}
+		if d.rBWUnit != "" && d.rBWUnit != scaledRecvRankBWUnit {
+			return fmt.Errorf("detected different scales for BW data")
+		}
+		if d.sBWUnit == "" {
+			d.sBWUnit = scaledSendRankBWUnit
+		}
+		if d.rBWUnit == "" {
+			d.rBWUnit = scaledRecvRankBWUnit
 		}
 
 		_, d.values = getMax(d.maxValue, d.values, rank, d.sendScaledHeatMap, d.recvScaledHeatMap, d.execScaledTimeMap, d.lateArrivalScaledTimeMap, scaledSendRankBW[rank], scaledRecvRankBW[rank])
@@ -293,7 +279,6 @@ func (d *plotData) generateHostAvgs(hostname string) error {
 	for _, rank := range ranks {
 		d.sendRankBW[rank] = float64(d.avgSendHeatMap[rank]) / d.avgExecTimeMap[rank]
 		d.recvRankBW[rank] = float64(d.avgRecvHeatMap[rank]) / d.avgExecTimeMap[rank]
-
 		var scaledSendRankBWUnit string
 		var scaledRecvRankBWUnit string
 		scaledSendRankBWUnit, scaledSendBW, err := scale.Float64s("B/s", []float64{d.sendRankBW[rank]})
@@ -306,33 +291,17 @@ func (d *plotData) generateHostAvgs(hostname string) error {
 			return err
 		}
 		d.scaledRecvRankBW[rank] = scaledRecvBW[0]
-
-		// If the average is different from 0, we try to scale it and hope that the scale
-		// will match what we already have for other values. If not, we fail, we have no
-		// mechanism to put various data to the same scale at the moment.
-		// So, before starting to do some calculation, we assume the following default values
-		// which are used when the average is equal to 0:
-		// - the scaled BW is equal to non-scaled BW
-		// - the unit is the one previous detected (by default the average is assumed to be
-		// 	 equal to 0 so it does not matter)
-		// Also note that if the rank is not in the communicator, the value is set to 'NaN'
-
-		if d.sendRankBW[rank] != 0 && !math.IsNaN(d.sendRankBW[rank]) {
-			if d.sBWUnit != "" && d.sBWUnit != scaledSendRankBWUnit {
-				return fmt.Errorf("detected different scales for BW data")
-			}
-			if d.sBWUnit == "" {
-				d.sBWUnit = scaledSendRankBWUnit
-			}
+		if d.sBWUnit != "" && d.sBWUnit != scaledSendRankBWUnit {
+			return fmt.Errorf("detected different scales for BW data")
 		}
-
-		if d.recvRankBW[rank] != 0 && !math.IsNaN(d.recvRankBW[rank]) {
-			if d.rBWUnit != "" && d.rBWUnit != scaledRecvRankBWUnit {
-				return fmt.Errorf("detected different scales for BW data")
-			}
-			if d.rBWUnit == "" {
-				d.rBWUnit = scaledRecvRankBWUnit
-			}
+		if d.rBWUnit != "" && d.rBWUnit != scaledRecvRankBWUnit {
+			return fmt.Errorf("detected different scales for BW data")
+		}
+		if d.sBWUnit == "" {
+			d.sBWUnit = scaledSendRankBWUnit
+		}
+		if d.rBWUnit == "" {
+			d.rBWUnit = scaledRecvRankBWUnit
 		}
 
 		_, d.values = getMax(d.maxValue, d.values, rank, d.avgSendScaledHeatMap, d.avgRecvScaledHeatMap, d.avgExecScaledTimeMap, d.avgLateArrivalScaledTimeMap, d.sendRankBW[rank], d.recvRankBW[rank])
@@ -543,7 +512,7 @@ func write(fd *os.File, dataFiles []string, numRanks int, maxValue int, hosts []
 	if err != nil {
 		return err
 	}
-	_, err = fd.WriteString("set yrange [0:1000]\n")
+	_, err = fd.WriteString(fmt.Sprintf("set yrange [0:1000]\n"))
 	if err != nil {
 		return err
 	}
diff --git a/tools/internal/pkg/webui/templates/heatmapDetails.html b/tools/internal/pkg/webui/templates/heatmapDetails.html
new file mode 100644
index 00000000..f1475975
--- /dev/null
+++ b/tools/internal/pkg/webui/templates/heatmapDetails.html
@@ -0,0 +1,8 @@
+<p>Heatmap for pattern {{ .PatternID }}</p>
+<table id="patternDetails">
+    <tr>
+        <td>
+            <img src="/images/{{displayHeatmap .PatternID}}" width="600" height="600">
+        </td>
+    </tr>
+</table>
\ No newline at end of file
diff --git a/tools/internal/pkg/webui/templates/heatmapsLayout.html b/tools/internal/pkg/webui/templates/heatmapsLayout.html
new file mode 100644
index 00000000..9bedc244
--- /dev/null
+++ b/tools/internal/pkg/webui/templates/heatmapsLayout.html
@@ -0,0 +1,160 @@
+<!DOCTYPE html>
+<meta http-equiv="Content-type" content="text/html;charset=UTF-8">
+<html>
+
+<head>
+    <style>
+        .body {
+            overflow: hidden;
+        }
+
+        .selected {
+            background: lightgreen;
+        }
+
+        table.PatternList {
+            border-collapse: collapse;
+            width: 100%;
+        }
+
+        th.patternList,
+        td.patternList {
+            font-size: 12px;
+            text-align: left;
+            padding: 8px;
+        }
+
+        tr.patternList:nth-child(odd) {
+            background-color: Lightskyblue;
+        }
+
+        div.patternList {
+            max-width: 200px;
+            margin: auto;
+            float: left;
+            border: 1px solid black;
+            overflow-y: scroll;
+        }
+
+        table.ColorSchemes {
+            border-collapse: collapse;
+            width: 100%;
+        }
+
+        th.colorSchemes,
+        td.colorSchemes {
+            font-size: 12px;
+            text-align: left;
+            padding: 8px;
+        }
+
+        tr.colorSchemes:nth-child(odd) {
+            background-color: Lightskyblue;
+        }
+
+        div.colorSchemes {
+            max-width: 200px;
+            margin: auto;
+            float: left;
+            border: 1px solid black;
+            overflow-y: scroll;
+        }
+
+        div.patternDetails {
+            max-width: 600px;
+            margin: auto;
+            min-height: 800px;
+            display: inline-block;
+            position: relative;
+            margin: 0;
+            position: sticky;
+            top: 0;
+        }
+
+        td.patternDetails {
+            vertical-align: top;
+            text-align: left;
+        }
+    </style>
+</head>
+
+<body>
+    <h1>Pattern display</h1>
+    <div class="mainPanel">
+        <div class="patternList">
+            <table class="patternList" id="patternListTable">
+                <tr>
+                    <th>Pattern</th>
+                    <th>Weight</th>
+                </tr>
+                {{range $patternID, $weight := .Weights}}
+                <tr class="patternList">
+                    <td class="patternList" id="patternID">{{ $patternID }}</td>
+                    <td class="patternList" id="weight">{{ $weight }}</td>
+                </tr>
+                {{end}}
+            </table>
+        </div>
+        <div class="colorSchemes">
+            <table class="colorSchemes" id="colorSchemesTable">
+                <tr>
+                    <th>Color Scheme</th>
+                </tr>
+                <tr class="selected">
+                    <td class="colorSchemes">default</td>
+                </tr>
+                <tr class="colorSchemes">
+                    <td class="colorSchemes">linear</td>
+                </tr>
+                <tr class="colorSchemes">
+                    <td class="colorSchemes">logarithmic</td>
+                </tr>
+                <tr class="colorSchemes">
+                    <td class="colorSchemes">own</td>
+                </tr>
+            </table>
+        </div>
+    </div>
+    <div class="patternDetails">
+        <table>
+            <tr>
+                <td class="patternDetails">
+                    <div class="patternDetails">
+                        <table id="patternInfo">
+
+                        </table>
+                    </div>
+                </td>
+            </tr>
+        </table>
+    </div>
+    <script type="text/javascript">
+        function highlight(e) {
+            e.target.parentNode.className = 'selected';
+            if (e.target.className == "patternList") {
+                // Restore previously selected element(s) of the table
+                var t = document.getElementById('patternListTable')
+                for (var i = 1, row; row = t.rows[i]; i++) {
+                    row.className = 'patternList';
+                }
+                e.target.parentNode.className = 'selected';
+                var patternID = e.target.parentNode.children[0].innerHTML;
+
+                var xhr = new XMLHttpRequest();
+                xhr.onreadystatechange = function () {
+                    if (xhr.readyState == 4 && xhr.status == 200) {
+                        document.getElementById("patternInfo").innerHTML = xhr.responseText;
+                    }
+                }
+                var params = "patternID=" + patternID;
+                xhr.open("GET", "/heatmap" + "?" + params);
+                xhr.send();
+            }
+        }
+
+        var table = document.getElementById('patternListTable');
+        table.onclick = highlight;
+    </script>
+</body>
+
+</html>
\ No newline at end of file
diff --git a/tools/internal/pkg/webui/templates/index.html b/tools/internal/pkg/webui/templates/index.html
index e1897e42..111d7fe0 100644
--- a/tools/internal/pkg/webui/templates/index.html
+++ b/tools/internal/pkg/webui/templates/index.html
@@ -51,14 +51,19 @@
         <div class="tab">
             <button class="tablinks" onclick="openTab(event, 'Calls')">Calls</button>
             <button class="tablinks" onclick="openTab(event, 'Patterns')">Patterns</button>
+            <button class="tablinks" onclick="openTab(event, 'Heatmaps')">Heatmaps</button>
         </div>
 
         <div id="Calls" class="tabcontent">
-            <object type="text/html" data="/calls" width="1600" height="600"></object>
+            <object type="text/html" data="/calls" width="1600" height="800"></object>
         </div>
 
         <div id="Patterns" class="tabcontent">
-            <object type="text/html" data="/patterns" width="1600" height="600"></object>
+            <object type="text/html" data="/patterns" width="1600" height="800"></object>
+        </div>
+
+        <div id="Heatmaps" class="tabcontent">
+            <object type="text/html" data="/heatmaps" width="1600" height="800"></object>
         </div>
     </div>
     <script>
diff --git a/tools/internal/pkg/webui/webui.go b/tools/internal/pkg/webui/webui.go
index 46517e81..49689245 100644
--- a/tools/internal/pkg/webui/webui.go
+++ b/tools/internal/pkg/webui/webui.go
@@ -48,6 +48,14 @@ type patternsSummaryData struct {
 	Content string
 }
 
+type heatmapsPageData struct {
+	Weights []int
+}
+
+type heatmapPageData struct {
+	PatternID int
+}
+
 type server struct {
 	mux              *http.ServeMux
 	cfg              *Config
@@ -55,6 +63,8 @@ type server struct {
 	callsTemplate    *template.Template
 	callTemplate     *template.Template
 	patternsTemplate *template.Template
+	heatmapsTemplate *template.Template
+	heatmapTemplate  *template.Template
 	stopTemplate     *template.Template
 }
 
@@ -97,11 +107,15 @@ type Config struct {
 	mainData callsPageData
 	cpd      callPageData
 	psd      patternsSummaryData
+	patterns heatmapsPageData
+	hpd      heatmapPageData
 
 	indexTemplatePath    string
 	callsTemplatePath    string
-	patternsTemplatePath string
 	callTemplatePath     string
+	patternsTemplatePath string
+	heatmapsTemplatePath string
+	heatmapTemplatePath  string
 	stopTemplatePath     string
 }
 
@@ -403,6 +417,39 @@ func (c *Config) servicePatternRequest(w http.ResponseWriter, r *http.Request) {
 	}
 }
 
+func (c *Config) serviceHeatmapsRequest(w http.ResponseWriter, r *http.Request) {
+	w.Header().Set("Content-Type", "text/html")
+
+	err := c.loadData()
+	if err != nil {
+		fmt.Printf("unable to load data: %s\n", err)
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+	}
+
+	c.patterns = heatmapsPageData{
+		Weights: []int{0, 0, 0, 0},
+	}
+}
+
+func (c *Config) serviceHeatmapDetailsRequest(w http.ResponseWriter, r *http.Request) {
+	var err error
+
+	patternID := 0
+	params := r.URL.Query()
+	for k, v := range params {
+		if k == "patternID" {
+			patternID, err = strconv.Atoi(v[0])
+			if err != nil {
+				http.Error(w, err.Error(), http.StatusInternalServerError)
+			}
+		}
+	}
+
+	c.hpd = heatmapPageData{
+		PatternID: patternID,
+	}
+}
+
 // Stop cleanly terminates a running webUI
 func (c *Config) Stop() error {
 	err := c.srv.Shutdown(context.TODO())
@@ -468,6 +515,8 @@ func Init() *Config {
 	cfg.callTemplatePath = cfg.getTemplateFilePath("callDetails")
 	cfg.stopTemplatePath = cfg.getTemplateFilePath("bye")
 	cfg.patternsTemplatePath = cfg.getTemplateFilePath("patterns")
+	cfg.heatmapsTemplatePath = cfg.getTemplateFilePath("heatmapsLayout")
+	cfg.heatmapTemplatePath = cfg.getTemplateFilePath("heatmapDetails")
 	return cfg
 }
 
@@ -495,6 +544,16 @@ func (s *server) patterns(w http.ResponseWriter, r *http.Request) {
 	s.patternsTemplate.Execute(w, s.cfg.psd /*s.cfg*/)
 }
 
+func (s *server) heatmaps(w http.ResponseWriter, r *http.Request) {
+	s.cfg.serviceHeatmapsRequest(w, r)
+	s.heatmapsTemplate.Execute(w, s.cfg.patterns /*s.cfg*/)
+}
+
+func (s *server) heatmap(w http.ResponseWriter, r *http.Request) {
+	s.cfg.serviceHeatmapDetailsRequest(w, r)
+	s.heatmapTemplate.Execute(w, s.cfg.hpd)
+}
+
 func (s *server) stop(w http.ResponseWriter, r *http.Request) {
 	s.stopTemplate.Execute(w, s.cfg)
 }
@@ -508,6 +567,8 @@ func newServer(cfg *Config) *server {
 	s.mux.HandleFunc("/calls", s.calls)
 	s.mux.HandleFunc("/call", s.call)
 	s.mux.HandleFunc("/patterns", s.patterns)
+	s.mux.HandleFunc("/heatmaps", s.heatmaps)
+	s.mux.HandleFunc("/heatmap", s.heatmap)
 	s.mux.HandleFunc("/stop", s.stop)
 	s.mux.Handle("/images/", http.StripPrefix("/images", http.FileServer(http.Dir(s.cfg.DatasetDir))))
 	return s
@@ -542,6 +603,11 @@ func (c *Config) Start() error {
 		}}).ParseFiles(c.callTemplatePath))
 	s.callsTemplate = template.Must(template.ParseFiles(c.callsTemplatePath))
 	s.patternsTemplate = template.Must(template.ParseFiles(c.patternsTemplatePath))
+	s.heatmapTemplate = template.Must(template.New("heatmapDetails.html").Funcs(template.FuncMap{
+		"displayHeatmap": func(patternID int) string {
+			return fmt.Sprintf("%d_task3.png", patternID)
+		}}).ParseFiles(c.heatmapTemplatePath))
+	s.heatmapsTemplate = template.Must(template.ParseFiles(c.heatmapsTemplatePath))
 	s.stopTemplate = template.Must(template.ParseFiles(c.stopTemplatePath))
 
 	c.srv = &http.Server{