Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,14 @@ Like any PMPI option, users need to use `LD_PRELOAD` while executing their appli
On a platform where `mpirun` is directly used, the command to start the application
looks like:
```
LD_PRELOAD=$HOME<path_to_repo>/src/alltoallv/liballtoallv.so mpirun --oversubscribe -np 3 app.exe
LD_PRELOAD=$HOME<path_to_repo>/alltoallv/liballtoallv.so mpirun --oversubscribe -np 3 app.exe
```

On a platform where a job manager is used, such as Slurm, users need to update the
batch script used to submit an application run. For instance, with Open MPI and Slurm,
it would look like:
```
mpirun -np $NPROC -x LD_PRELOAD=<path_to_repo>/src/alltoallv/liballtoallv_counts.so app.exe
mpirun -np $NPROC -x LD_PRELOAD=/global/home/users/geoffroy/projects/alltoall_profiling/alltoallv/liballtoallv_counts.so app.exe
```

When using a job scheduler, users are required to correctly set the LD_PRELOAD details
Expand Down Expand Up @@ -331,4 +331,4 @@ consist of a graph providing:
- the execution time per rank,
- the arrival time per rank,
- the bandwidth per rank.
Under the graph appears the raw send and receive counters.
Under the graph appears the raw send and receive counters.
4 changes: 2 additions & 2 deletions build-scripts/build-hpcac.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#!/bin/bash

cd /global/home/users/cyrusl/placement/expt0070/alltoall_profiling
cd /global/home/users/cyrusl/placement/expt0066/alltoall_profiling

module purge
spack unload --all

HNAME=$(hostname)

#if [[ ${HNAME:0:4} == "thor" ]]; then
module load gcc/8.3.1 hpcx/2.7.0 gnuplot/5.2.8
module load gcc/8.3.1 hpcx/2.7.0
#else
# module load gcc/4.8.5 hpcx/2.7.0 # these were used for compiling on Login node for use on Jupiter before change to Centos 8
#fi
Expand Down
20 changes: 4 additions & 16 deletions examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,11 @@
# See LICENSE.txt for license information
#

all: alltoallv_c alltoallv_dt_c alltoallv_multicomms_c alltoallv_f alltoallv_bigcounts_c alltoall_demo alltoall_simple_c alltoall_bigcounts_c alltoall_multicomms_c alltoall_dt_c
all: alltoallv_c alltoallv_dt_c alltoallv_multicomms_c alltoallv_f alltoallv_bigcounts_c alltoall

alltoall_demo: alltoall_demo.c
mpicc -g alltoall_demo.c -o alltoall_demo

alltoall_simple_c: alltoall_simple_c.c
mpicc -g alltoall_simple_c.c -o alltoall_simple_c
alltoall: alltoall.c
mpicc -g alltoall.c -o alltoall

alltoall_bigcounts_c: alltoall_bigcounts_c.c
mpicc -g alltoall_bigcounts_c.c -o alltoall_bigcounts_c

alltoall_multicomms_c: alltoall_multicomms_c.c
mpicc -g alltoall_multicomms_c.c -o alltoall_multicomms_c

alltoall_dt_c: alltoall_dt_c.c
mpicc -g alltoall_dt_c.c -o alltoall_dt_c

alltoallv_c: alltoallv.c
mpicc -g alltoallv.c -o alltoallv_c

Expand All @@ -37,4 +25,4 @@ alltoallv_bigcounts_c: alltoallv_bigcounts.c
mpicc -g alltoallv_bigcounts.c -o alltoallv_bigcounts_c

clean:
@rm -f alltoallv_c alltoallv_dt_c alltoallv_f alltoallv_multicomms_c alltoallv_bigcounts_c alltoall_demo alltoall_simple_c
@rm -f alltoallv_c alltoallv_dt_c alltoallv_f alltoallv_multicomms_c alltoallv_bigcounts_c alltoall
429 changes: 429 additions & 0 deletions examples/alltoall.c

Large diffs are not rendered by default.

50 changes: 25 additions & 25 deletions src/alltoall/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,31 @@ include ../makefile_common.mk

all: liballtoall.so liballtoall_location.so liballtoall_counts.so liballtoall_late_arrival.so liballtoall_exec_timings.so liballtoall_backtrace.so

liballtoall_counts.so: ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o mpi_alltoall.c alltoall_profiler.h
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=0 -DENABLE_RAW_DATA=1 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o mpi_alltoall.c -o liballtoall_counts.so
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=0 -DENABLE_RAW_DATA=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o mpi_alltoall.c -o liballtoall_counts_unequal.so
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=1 -DENABLE_RAW_DATA=1 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o mpi_alltoall.c -o liballtoall_counts_compact.so
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=1 -DENABLE_RAW_DATA=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/timings.o ../common/logger_counts.o mpi_alltoall.c -o liballtoall_counts_unequal_compact.so

liballtoall_exec_timings.so: ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o mpi_alltoall.c alltoall_profiler.h
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_EXEC_TIMING=1 ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o mpi_alltoall.c -o liballtoall_exec_timings.so
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_EXEC_TIMING=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/exec_timings.o ../common/logger_exec_timings.o mpi_alltoall.c -o liballtoall_exec_timings_counts_unequal.so

liballtoall_late_arrival.so: ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o mpi_alltoall.c alltoall_profiler.h
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LATE_ARRIVAL_TIMING=1 ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o mpi_alltoall.c -o liballtoall_late_arrival.so
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LATE_ARRIVAL_TIMING=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/late_arrival_timings.o ../common/logger_late_arrival_timings.o mpi_alltoall.c -o liballtoall_late_arrival_counts_unequal.so

liballtoall_backtrace.so: ${COMMON_OBJECTS} ../common/logger_backtrace.o ../common/timings.o mpi_alltoall.c alltoall_profiler.h
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_BACKTRACE=1 ${COMMON_OBJECTS} ../common/logger_backtrace.o ../common/timings.o mpi_alltoall.c -o liballtoall_backtrace.so
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_BACKTRACE=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/logger_backtrace.o ../common/timings.o mpi_alltoall.c -o liballtoall_backtrace_counts_unequal.so

liballtoall_location.so: ${COMMON_OBJECTS} ../common/logger_location.o ../common/timings.o mpi_alltoall.c alltoall_profiler.h
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LOCATION_TRACKING=1 ${COMMON_OBJECTS} ../common/logger_location.o ../common/timings.o mpi_alltoall.c -o liballtoall_location.so
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LOCATION_TRACKING=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/logger_location.o ../common/timings.o mpi_alltoall.c -o liballtoall_location_counts_unequal.so

liballtoall.so: ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o mpi_alltoall.c alltoall_profiler.h
mpicc -I../ -I../common/ -g -shared -fPIC ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o mpi_alltoall.c -o liballtoall.so
mpicc -I../ -I../common/ -g -shared -fPIC -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/timings.o ../common/logger.o mpi_alltoall.c -o liballtoall_counts_unequal.so
liballtoall_counts.so: ${COMMON_OBJECTS} mpi_alltoall.c alltoall_profiler.h
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=0 -DENABLE_RAW_DATA=1 ../common/logger_counts.o ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_counts.so
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=0 -DENABLE_RAW_DATA=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ../common/logger_counts.o ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_counts_unequal.so
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=1 -DENABLE_RAW_DATA=1 ../common/logger_counts.o ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_counts_compact.so
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_COMPACT_FORMAT=1 -DENABLE_RAW_DATA=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ../common/logger_counts.o ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_counts_unequal_compact.so

liballtoall_exec_timings.so: ${COMMON_OBJECTS} ../common/logger_exec_timings.o mpi_alltoall.c alltoall_profiler.h
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_EXEC_TIMING=1 ${COMMON_OBJECTS} ../common/logger_exec_timings.o mpi_alltoall.c -o liballtoall_exec_timings.so
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_EXEC_TIMING=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} ../common/logger_exec_timings.o mpi_alltoall.c -o liballtoall_exec_timings_counts_unequal.so

liballtoall_late_arrival.so: ${COMMON_OBJECTS} ../common/logger_late_arrival_timings.o mpi_alltoall.c alltoall_profiler.h
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LATE_ARRIVAL_TIMING=1 ../common/logger_late_arrival_timings.o ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_late_arrival.so
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LATE_ARRIVAL_TIMING=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ../common/logger_late_arrival_timings.o ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_late_arrival_counts_unequal.so

liballtoall_backtrace.so: ${COMMON_OBJECTS} mpi_alltoall.c alltoall_profiler.h
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_BACKTRACE=1 ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_backtrace.so
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_BACKTRACE=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_backtrace_counts_unequal.so

liballtoall_location.so: ${COMMON_OBJECTS} ../common/logger_location.o mpi_alltoall.c alltoall_profiler.h
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LOCATION_TRACKING=1 ../common/logger_location.o ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_location.so
mpicc -I../ -I../common/ -g -shared -fPIC -DENABLE_LOCATION_TRACKING=1 -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ../common/logger_location.o ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_location_counts_unequal.so

liballtoall.so: ${COMMON_OBJECTS} ../common/logger.o mpi_alltoall.c alltoall_profiler.h
mpicc -I../ -I../common/ -g -shared -fPIC ../common/logger.o ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall.so
mpicc -I../ -I../common/ -g -shared -fPIC ../common/logger.o -DASSUME_COUNTS_EQUAL_ALL_RANKS=0 ${COMMON_OBJECTS} mpi_alltoall.c -o liballtoall_counts_unequal.so

check: all

Expand Down
5 changes: 4 additions & 1 deletion tools/Makefile
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
# Copyright (c) 2020-2021 NVIDIA CORPORATION. All rights reserved.

all: webui map profile sendprofiler recvprofiler getcounters validate srcountsanalyzer analyzetimings analyzebacktraces getcalldata getbins profilerrun
all: webui map profile sendprofiler recvprofiler getcounters validate srcountsanalyzer analyzetimings analyzebacktraces getcalldata getbins profilerrun png

profilerrun:
cd cmd/profilerrun; go build profilerrun.go

webui:
cd cmd/webui; go build webui.go

png:
cd cmd/png; go build png.go

map:
cd cmd/map; go build map.go

Expand Down
245 changes: 245 additions & 0 deletions tools/cmd/png/png.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
package main

import (
"bufio"
"fmt"
"image"
"image/color"
"image/png"
"io/ioutil"
"math"
"os"
"sort"
"strconv"
"strings"
)

const max_ranks = 200
const max_patterns = 30
const width = 10

var ranks int

type TestStringList []string

//元素个数
func (t TestStringList) Len() int {
return len(t)
}

//比较结果
func (t TestStringList) Less(i, j int) bool {
iw := weight_from_pattern(t[i])
jw := weight_from_pattern(t[j])
return iw > jw
}

//交换方式
func (t TestStringList) Swap(i, j int) {
t[i], t[j] = t[j], t[i]
}

//从pattern原始文本提取weight
func weight_from_pattern(str string) int {
res, _ := strconv.Atoi(strings.Split(strings.Split(str, "Count: ")[1], " calls")[0])
return res
}

//pattern转rgb数组
func pattern_to_sz(str string) [max_ranks][max_ranks]int {
var res [max_ranks][max_ranks]int
totalRank, _ := strconv.Atoi(strings.Split(strings.Split(str, "Number of ranks: ")[1], "\n")[0])
ranks = totalRank
items := strings.Split(str, "Rank(s) ")
items = items[1:]
for _, item := range items {
part := strings.Split(item, ":")
tmp := strings.Split(part[0], ",")
var from [max_ranks]bool
for i := 0; i < totalRank; i++ {
from[i] = false
}
for _, x := range tmp {
if strings.Contains(x, "-") {
splits := strings.Split(x, "-")
tmpi, _ := strconv.Atoi(splits[0])
tmpj, _ := strconv.Atoi(splits[1])
for i := tmpi; i <= tmpj; i++ {
from[i] = true
}
} else {
tmpi, _ := strconv.Atoi(x)
from[tmpi] = true
}
}
dest := strings.Fields(part[1])
for i := 0; i < totalRank; i++ {
if from[i] {
for j, x := range dest {
tmpx, _ := strconv.Atoi(x)
res[i][j] += tmpx
}
}
}
}
return res
}

//数据量转rgb值,task3
func num_to_rgb_8color(num int) [3]uint8 {
if num == 0 {
return [3]uint8{255, 255, 255} //white
} else if num <= 10 {
return [3]uint8{255, 255, 0} //yellow
} else if num <= 100 {
return [3]uint8{255, 165, 0} //orange
} else if num <= 1000 {
return [3]uint8{0, 255, 0} //green
} else if num <= 10000 {
return [3]uint8{255, 0, 0} //red
} else if num <= 100000 {
return [3]uint8{160, 32, 240} //purple
} else if num <= 1000000 {
return [3]uint8{165, 42, 42} //brown
} else {
return [3]uint8{0, 0, 0} //black
}
}

//输出不大于的num的最大的10的n次幂
func num_to_low(num int) int {
times := int(math.Log10(float64(num)))
res := 1
for i := 0; i < times; i++ {
res *= 10
}
return res
}

//数据量转rgb值,task5的第一种方法(线性)
func num_to_rgb_linear(num int) [3]uint8 {
depth := num / 4000
if depth > 255 {
depth = 255
}
depth = 255 - depth
return [3]uint8{uint8(depth), uint8(depth), uint8(depth)}
}

//数据量转rgb值,task5的第二种方法(对数)
func num_to_rgb_Logarithmic(num int) [3]uint8 {
depth := math.Log10(float64(num+1)) * 42.66
if depth > 255 {
depth = 255
}
depth = 255 - depth
return [3]uint8{uint8(depth), uint8(depth), uint8(depth)}
}

//数据量转rgb值,task5的第三种方法(自己的)
func num_to_rgb_own(num int) [3]uint8 {
low := num_to_low(num)
high := low * 10
if low == 1 {
low = 0
}
lowrgb := num_to_rgb_8color(low)
highrgb := num_to_rgb_8color(high)

var resrgb [3]uint8
for i := 0; i < 3; i++ {
resrgb[i] = uint8((float64(lowrgb[i])*float64(high-num) + float64(highrgb[i])*float64(num-low)) / float64(high-low))
}
return resrgb
}

//rgb数组转png图像
func sz_to_png(sz [max_ranks][max_ranks]int, path string, mode int) {
file, err := os.Create(path)
if err != nil {
fmt.Println(err)
}
defer file.Close()
rgba := image.NewRGBA(image.Rect(0, 0, ranks*width, ranks*width))
for x := 0; x < ranks*width; x++ {
for y := 0; y < ranks*width; y++ {
var rgb [3]uint8
if mode == 0 {
rgb = num_to_rgb_8color(sz[x/width][y/width])
} else if mode == 1 {
rgb = num_to_rgb_linear(sz[x/width][y/width])
} else if mode == 2 {
rgb = num_to_rgb_Logarithmic(sz[x/width][y/width])
} else if mode == 3 {
rgb = num_to_rgb_own(sz[x/width][y/width])
}

rgba.Set(x, y, color.RGBA{rgb[0], rgb[1], rgb[2], 255})
}
}
err = png.Encode(file, rgba)
if err != nil {
fmt.Println(err)
}
}
func main() {
var filein string
var filepath string
if len(os.Args) > 1 {
filein = os.Args[1]
lastpiepos := strings.LastIndex(filein, "/")
if lastpiepos == -1 {
fmt.Println("Can't guess path of input file")
os.Exit(0)
}
filepath = filein[:lastpiepos]
} else {
fmt.Println("No input file!")
return
}
data, err := ioutil.ReadFile(filein) //输入文件路径
if err != nil {
fmt.Println("File reading error", err)
return
}
//截取pattern并排序
pattern := strings.Split(string(data)[1:], "#")
sort.Sort(TestStringList(pattern))
//输出每个pattern的weight到weight.txt
outputFile, outputError := os.OpenFile(filepath+"/weight.txt", os.O_WRONLY|os.O_CREATE, 0666)
if outputError != nil {
fmt.Println(outputError)
return
}
defer outputFile.Close()
outputWriter := bufio.NewWriter(outputFile)
var weight [max_patterns]int
for i := 0; i < len(pattern); i++ {
weight[i] = weight_from_pattern(pattern[i])
outputWriter.WriteString(strconv.Itoa(i) + " " + strconv.Itoa(weight[i]) + "\n")
}
outputWriter.Flush()
//计算rgb数组的加权和,
var allsz [max_ranks][max_ranks]int
for i := 0; i < len(pattern); i++ {
if i > 10 {
break
}
sz := pattern_to_sz(pattern[i])
for x := 0; x < max_ranks; x++ {
for y := 0; y < max_ranks; y++ {
allsz[x][y] += sz[x][y] * weight[i]
}
}

sz_to_png(sz, filepath+"/"+strconv.Itoa(i)+"_task3.png", 0)
sz_to_png(sz, filepath+"/"+strconv.Itoa(i)+"_task5_linear.png", 1)
sz_to_png(sz, filepath+"/"+strconv.Itoa(i)+"_task5_log.png", 2)
sz_to_png(sz, filepath+"/"+strconv.Itoa(i)+"_task5_own.png", 3)
}
sz_to_png(allsz, filepath+"/task4.png", 0)
sz_to_png(allsz, filepath+"/task4_linear.png", 1)
sz_to_png(allsz, filepath+"/task4_log.png", 2)
sz_to_png(allsz, filepath+"/task4_own.png", 3)

}
Loading