diff --git a/tools/cmd/map/map.go b/tools/cmd/map/map.go
index c1a6f6f4..94438995 100644
--- a/tools/cmd/map/map.go
+++ b/tools/cmd/map/map.go
@@ -49,7 +49,7 @@ func main() {
collectiveName := "alltoallv" // harcoded for now, detection coming soon
// We do not care about the data returned by Create, we only care here about the files that are generated.
- _, _, _, _, _, err := maps.Create(codeBaseDir, collectiveName, maps.Heat, *dir, nil)
+ _, _, _, _, _, _, err := maps.Create(codeBaseDir, collectiveName, maps.Heat, *dir, nil)
if err != nil {
fmt.Printf("ERROR: unable to create heat map: %s", err)
os.Exit(1)
diff --git a/tools/go.sum b/tools/go.sum
index ec186c18..9abed5e8 100644
--- a/tools/go.sum
+++ b/tools/go.sum
@@ -2,4 +2,10 @@ github.com/gomarkdown/markdown v0.0.0-20200609195525-3f9352745725 h1:X6sZdr+t2E2
github.com/gomarkdown/markdown v0.0.0-20200609195525-3f9352745725/go.mod h1:aii0r/K0ZnHv7G0KF7xy1v0A7s2Ljrb5byB7MO5p6TU=
github.com/gvallee/go_util v1.0.1 h1:Ch/PpAlHrHNmL2Upaxif/Nt4CqtaazDyTXh5fIhutJo=
github.com/gvallee/go_util v1.0.1/go.mod h1:fTexpwdH/n05Ziu0TXJIQsr7E+46QpBxNdeOOsyC0/s=
+github.com/lucasb-eyer/go-colorful v1.0.3 h1:QIbQXiugsb+q10B+MI+7DI1oQLdmnep86tWFlaaUAac=
+github.com/lucasb-eyer/go-colorful v1.0.3/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
+github.com/mazznoer/colorgrad v0.8.1 h1:Bw/ks+KujOOg9E6YQvPqSqTLryiFnwliAH5VMZarSTI=
+github.com/mazznoer/colorgrad v0.8.1/go.mod h1:xCjvoNkXHJIAPOUMSMrXkFdxTGQqk8zMYS3e5hSLghA=
+github.com/mazznoer/csscolorparser v0.1.0 h1:xUf1uzU1r24JleIIb2Kz3bl7vATStxy53gm67yuPP+c=
+github.com/mazznoer/csscolorparser v0.1.0/go.mod h1:Aj22+L/rYN/Y6bj3bYqO3N6g1dtdHtGfQ32xZ5PJQic=
golang.org/dl v0.0.0-20190829154251-82a15e2f2ead/go.mod h1:IUMfjQLJQd4UTqG1Z90tenwKoCX93Gn3MAQJMOSBsDQ=
diff --git a/tools/internal/pkg/maps/maps.go b/tools/internal/pkg/maps/maps.go
index 6c4831ac..ad84dd3a 100644
--- a/tools/internal/pkg/maps/maps.go
+++ b/tools/internal/pkg/maps/maps.go
@@ -106,7 +106,16 @@ func saveGlobalHeatMap(codeBaseDir string, heatmap map[int]int, filepath string)
}
}
defer fd.Close()
- for key, value := range heatmap {
+
+ // sort heatmap by rank
+ var sortedKey []int
+ for k := range heatmap {
+ sortedKey = append(sortedKey, k)
+ }
+ sort.Ints(sortedKey)
+
+ for _, key := range sortedKey {
+ value := heatmap[key]
_, err := fd.WriteString(fmt.Sprintf("Rank %d: %d bytes\n", key, value))
if err != nil {
return err
@@ -176,7 +185,15 @@ func saveHostHeatMap(codeBaseDir string, heatMap map[string]int, filepath string
return err
}
- for key, value := range heatMap {
+ // sort heatMap by key
+ keys := make([]string, 0)
+ for k := range heatMap {
+ keys = append(keys, k)
+ }
+ sort.Strings(keys)
+
+ for _, key := range keys {
+ value := heatMap[key]
_, err := fd.WriteString(fmt.Sprintf("Host %s: %d bytes\n", key, value))
if err != nil {
return err
@@ -341,42 +358,43 @@ func createHeatMap(codeBaseDir string, collectiveName string, dir string, leadRa
return nil
}
-func commCreate(codeBaseDir string, collectiveName string, dir string, leadRank int, allCallsData map[int]*counts.CallData, globalSendHeatMap map[int]int, globalRecvHeatMap map[int]int, rankNumCallsMap map[int]int) (*location.RankFileData, CallsDataT, error) {
+func commCreate(codeBaseDir string, collectiveName string, dir string, leadRank int, allCallsData map[int]*counts.CallData, globalSendHeatMap map[int]int, globalRecvHeatMap map[int]int, rankNumCallsMap map[int]int) (*location.RankFileData, []*location.Data, CallsDataT, error) {
commMaps := CallsDataT{
SendHeatMap: map[int]map[int]int{},
RecvHeatMap: map[int]map[int]int{},
}
var rankFileData *location.RankFileData
+ var rankData []*location.Data
var err error
- rankFileData, _, commMaps.RanksMap, err = prepareRanksMap(codeBaseDir, dir)
+ rankFileData, _, commMaps.RanksMap, rankData, err = prepareRanksMap(codeBaseDir, dir)
if err != nil {
- return nil, commMaps, err
+ return nil, nil, commMaps, err
}
err = createHeatMap(codeBaseDir, collectiveName, dir, leadRank, rankFileData, allCallsData, &commMaps, globalSendHeatMap, globalRecvHeatMap, rankNumCallsMap)
if err != nil {
- return rankFileData, commMaps, err
+ return rankFileData, rankData, commMaps, err
}
// Save the heat maps for the entire execution
globalSendHeatMapFilePath := filepath.Join(dir, GlobalHeatMapPrefix+"-send.md")
err = saveGlobalHeatMap(codeBaseDir, globalSendHeatMap, globalSendHeatMapFilePath)
if err != nil {
- return rankFileData, commMaps, err
+ return rankFileData, rankData, commMaps, err
}
globalRecvHeatMapFilePath := filepath.Join(dir, GlobalHeatMapPrefix+"-recv.md")
err = saveGlobalHeatMap(codeBaseDir, globalRecvHeatMap, globalRecvHeatMapFilePath)
if err != nil {
- return rankFileData, commMaps, err
+ return rankFileData, rankData, commMaps, err
}
- return rankFileData, commMaps, nil
+ return rankFileData, rankData, commMaps, nil
}
// Create is the main function to create heat maps. The id identifies what type of maps
// need to be created.
-func Create(codeBaseDir string, collectiveName string, id int, dir string, allCallsData []counts.CommDataT) (map[int]*location.RankFileData, map[int]CallsDataT, map[int]int, map[int]int, map[int]int, error) {
+func Create(codeBaseDir string, collectiveName string, id int, dir string, allCallsData []counts.CommDataT) (map[int]*location.RankFileData, map[int]CallsDataT, []*location.Data, map[int]int, map[int]int, map[int]int, error) {
switch id {
case Heat:
var err error
@@ -385,13 +403,15 @@ func Create(codeBaseDir string, collectiveName string, id int, dir string, allCa
globalCallsData := make(map[int]CallsDataT)
// fixme: RankFileData is supposed to be static and dealing with ranks on comm world, no need to track per lead rank
globalCommRankFileData := make(map[int]*location.RankFileData)
+ // all calls have the same location data
+ globalCommData := make([]*location.Data, 0)
globalSendHeatMap := make(map[int]int) // The comm world rank is the key, the value amount of data sent to it
globalRecvHeatMap := make(map[int]int)
for _, commData := range allCallsData {
- globalCommRankFileData[commData.LeadRank], globalCallsData[commData.LeadRank], err = commCreate(codeBaseDir, collectiveName, dir, commData.LeadRank, commData.CallData, globalSendHeatMap, globalRecvHeatMap, rankNumCallsMap)
+ globalCommRankFileData[commData.LeadRank], globalCommData, globalCallsData[commData.LeadRank], err = commCreate(codeBaseDir, collectiveName, dir, commData.LeadRank, commData.CallData, globalSendHeatMap, globalRecvHeatMap, rankNumCallsMap)
if err != nil {
- return nil, nil, nil, nil, nil, err
+ return nil, nil, nil, nil, nil, nil, err
}
}
@@ -399,19 +419,19 @@ func Create(codeBaseDir string, collectiveName string, id int, dir string, allCa
globalSendHeatMapFilePath := filepath.Join(dir, GlobalHeatMapPrefix+"-send.md")
err = saveGlobalHeatMap(codeBaseDir, globalSendHeatMap, globalSendHeatMapFilePath)
if err != nil {
- return nil, nil, nil, nil, nil, err
+ return nil, nil, nil, nil, nil, nil, err
}
globalRecvHeatMapFilePath := filepath.Join(dir, GlobalHeatMapPrefix+"-recv.md")
err = saveGlobalHeatMap(codeBaseDir, globalRecvHeatMap, globalRecvHeatMapFilePath)
if err != nil {
- return nil, nil, nil, nil, nil, err
+ return nil, nil, nil, nil, nil, nil, err
}
- return globalCommRankFileData, globalCallsData, globalSendHeatMap, globalRecvHeatMap, rankNumCallsMap, nil
+ return globalCommRankFileData, globalCallsData, globalCommData, globalSendHeatMap, globalRecvHeatMap, rankNumCallsMap, nil
}
- return nil, nil, nil, nil, nil, fmt.Errorf("unknown map type: %d", id)
+ return nil, nil, nil, nil, nil, nil, fmt.Errorf("unknown map type: %d", id)
}
func saveProcessedLocationData(dir string, leadRank int, info map[int]int) error {
@@ -481,7 +501,15 @@ func createRankFile(dir string, hm *location.RankFileData) error {
return err
}
- for host, rankList := range hm.HostMap {
+ // sort hm.HostMap by key
+ keys := make([]string, 0)
+ for k := range hm.HostMap {
+ keys = append(keys, k)
+ }
+ sort.Strings(keys)
+
+ for _, host := range keys {
+ rankList := hm.HostMap[host]
sort.Ints(rankList)
_, err = fd.WriteString(fmt.Sprintf("Host %s - %d ranks: %s\n", host, len(rankList), notation.CompressIntArray(rankList)))
if err != nil {
@@ -492,7 +520,7 @@ func createRankFile(dir string, hm *location.RankFileData) error {
return nil
}
-func prepareRanksMap(codeBaseDir string, dir string) (*location.RankFileData, map[int][]*location.RankLocation, map[int]map[int]int, error) {
+func prepareRanksMap(codeBaseDir string, dir string) (*location.RankFileData, map[int][]*location.RankLocation, map[int]map[int]int, []*location.Data, error) {
callMap := make(map[int][]*location.RankLocation)
callsRanksMap := make(map[int]map[int]int)
// This is to track the files for a specific communicator
@@ -503,7 +531,7 @@ func prepareRanksMap(codeBaseDir string, dir string) (*location.RankFileData, ma
// Find all the location files
f, err := ioutil.ReadDir(dir)
if err != nil {
- return nil, nil, nil, err
+ return nil, nil, nil, nil, err
}
var locationFiles []string
for _, file := range f {
@@ -513,13 +541,15 @@ func prepareRanksMap(codeBaseDir string, dir string) (*location.RankFileData, ma
locationFiles = append(locationFiles, filepath.Join(dir, filename))
}
}
+ locationsDataList := make([]*location.Data, 0)
// Parse each file and aggregate the results from each file.
for _, locationFile := range locationFiles {
callsData, locationsData, err := location.ParseLocationFile(codeBaseDir, locationFile)
if err != nil {
- return nil, nil, nil, err
+ return nil, nil, nil, nil, err
}
+ locationsDataList = append(locationsDataList, locationsData)
for callID := range callsData {
if _, ok := callsRanksMap[callID]; !ok {
// Transform the array of locations into a map
@@ -547,10 +577,10 @@ func prepareRanksMap(codeBaseDir string, dir string) (*location.RankFileData, ma
err = createRankFile(dir, hm)
if err != nil {
- return hm, nil, nil, err
+ return hm, nil, nil, nil, err
}
- return hm, callMap, callsRanksMap, nil
+ return hm, callMap, callsRanksMap, locationsDataList, nil
}
// CreateAvgMaps uses the send and receive counts to create an average heat map of the data that is sent/received
diff --git a/tools/internal/pkg/patterns/patterns.go b/tools/internal/pkg/patterns/patterns.go
index f8fd2ea4..254a765e 100644
--- a/tools/internal/pkg/patterns/patterns.go
+++ b/tools/internal/pkg/patterns/patterns.go
@@ -14,6 +14,7 @@ import (
"os"
"path/filepath"
"reflect"
+ "sort"
"strings"
"github.com/gvallee/alltoallv_profiling/tools/internal/pkg/counts"
@@ -34,6 +35,18 @@ type CallData struct {
Calls []int
}
+type HeavyPattern struct {
+ // The number of calls
+ Occurrence int
+
+ // Raw string presentation of counts
+ RawCounts string
+
+ // Counts are the counts for all ranks involved in the operation
+ // The key is the rank sending/receiving the data and the value an array of integers representing counts for each destination/source
+ Counts map[int][]int
+}
+
// Data holds the data all the patterns the infrastructure was able to detect
type Data struct {
// AllPatterns is the data for all the patterns that have been detected
@@ -50,6 +63,19 @@ type Data struct {
// Empty is the data of all the patterns that do not exchange any data (all counts are equal to 0)
Empty []*CallData
+
+ // HeavyPatterns is the list of patterns sorted by occurrence
+ HeavyPatterns []HeavyPattern
+}
+
+// Convert raw counts to string to store the slice as key
+func rawCountsToKey(counts []string) string {
+ var buf strings.Builder
+ for _, count := range counts {
+ fmt.Fprintf(&buf, "%s\n", count)
+ }
+
+ return buf.String()
}
func CompareCallPatterns(p1 map[int]int, p2 map[int]int) bool {
@@ -189,6 +215,11 @@ func GetSummaryFilePath(basedir string, jobid int, rank int) string {
return filepath.Join(basedir, fmt.Sprintf("%sjob%d-rank%d.md", SummaryFilePrefix, jobid, rank))
}
+// GetHeavyFilePath returns the full path to the heavy pattern file
+func GetHeavyFilePath(basedir string) string {
+ return filepath.Join(basedir, fmt.Sprintf("heavy-patterns.md"))
+}
+
func getPatterns(reader *bufio.Reader) (string, error) {
patterns := ""
@@ -496,6 +527,10 @@ func ParseFiles(sendCountsFile string, recvCountsFile string, numCalls int, rank
return nil, patterns, fmt.Errorf("counts.LoadCallsData() did not return any data")
}
+ // collect heavy patterns
+ // the key is RawCount
+ var heavyPattern = make(map[string]HeavyPattern)
+
b := progress.NewBar(numCalls, "Analyzing alltoallv calls")
defer progress.EndBar(b)
for i := 0; i < numCalls; i++ {
@@ -518,6 +553,20 @@ func ParseFiles(sendCountsFile string, recvCountsFile string, numCalls int, rank
return nil, patterns, fmt.Errorf("no recv patterns available")
}
+ // Analyze heavy pattern
+ key := rawCountsToKey(callData[i].SendData.RawCounts)
+ pattern, ok := heavyPattern[key]
+ if !ok {
+ // does not exist
+ pattern.RawCounts = key
+ for _, counts := range callData[i].SendData.Counts {
+ // use the first map, all values are the same
+ pattern.Counts = counts
+ }
+ }
+ pattern.Occurrence += 1
+ heavyPattern[key] = pattern
+
// Analyze the send/receive pattern from the call
err := patterns.addPattern(i, callData[i].SendData.Statistics.Patterns, callData[i].RecvData.Statistics.Patterns)
if err != nil {
@@ -537,6 +586,18 @@ func ParseFiles(sendCountsFile string, recvCountsFile string, numCalls int, rank
return nil, patterns, fmt.Errorf("extracted data of %d calls instead of %d", len(callData), numCalls)
}
+ // sort heavy patterns by occurrence
+ for _, val := range heavyPattern {
+ patterns.HeavyPatterns = append(patterns.HeavyPatterns, val)
+ }
+ sort.Slice(patterns.HeavyPatterns, func(i, j int) bool {
+ if patterns.HeavyPatterns[i].Occurrence != patterns.HeavyPatterns[j].Occurrence {
+ return patterns.HeavyPatterns[i].Occurrence > patterns.HeavyPatterns[j].Occurrence
+ } else {
+ return patterns.HeavyPatterns[i].RawCounts > patterns.HeavyPatterns[j].RawCounts
+ }
+ })
+
return callData, patterns, nil
}
@@ -555,6 +616,7 @@ func WriteData(patternsFd *os.File, patternsSummaryFd *os.File, patternsData Dat
num++
}
+ // patterns summary
if !NoSummary(patternsData) {
if len(patternsData.OneToN) != 0 {
_, err := patternsSummaryFd.WriteString("# 1 to N patterns\n\n")
diff --git a/tools/internal/pkg/plot/plot.go b/tools/internal/pkg/plot/plot.go
index 8d54a038..c044fb47 100644
--- a/tools/internal/pkg/plot/plot.go
+++ b/tools/internal/pkg/plot/plot.go
@@ -18,6 +18,8 @@ import (
"strconv"
"strings"
+ "github.com/gvallee/alltoallv_profiling/tools/internal/pkg/location"
+ "github.com/gvallee/alltoallv_profiling/tools/internal/pkg/patterns"
"github.com/gvallee/alltoallv_profiling/tools/internal/pkg/scale"
"github.com/gvallee/go_util/pkg/util"
)
@@ -214,6 +216,11 @@ func (d *plotData) generateCallsAvgs(hostname string, leadRank int, callID int)
}
}
for _, rank := range ranks {
+ if _, ok := d.execTimeMap[rank]; !ok {
+ // exec time not found, avoid division with zero
+ continue
+ }
+
d.sendRankBW[rank] = float64(d.sendHeatMap[rank]) / d.execTimeMap[rank]
d.recvRankBW[rank] = float64(d.recvHeatMap[rank]) / d.execTimeMap[rank]
@@ -291,6 +298,11 @@ func (d *plotData) generateHostAvgs(hostname string) error {
}
}
for _, rank := range ranks {
+ if _, ok := d.avgExecTimeMap[rank]; !ok {
+ // exec time not found, avoid division with zero
+ continue
+ }
+
d.sendRankBW[rank] = float64(d.avgSendHeatMap[rank]) / d.avgExecTimeMap[rank]
d.recvRankBW[rank] = float64(d.avgRecvHeatMap[rank]) / d.avgExecTimeMap[rank]
@@ -561,17 +573,17 @@ func write(fd *os.File, dataFiles []string, numRanks int, maxValue int, hosts []
}
// Special for the first node
- str += fmt.Sprintf(fmt.Sprintf("\"%s.txt\" using 2:xtic(1) with points ls 1 title \"data sent (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, sendUnit))
- str += fmt.Sprintf(fmt.Sprintf("\"%s.txt\" using 3 with points ls 2 title \"data received (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, recvUnit))
- str += fmt.Sprintf(fmt.Sprintf("\"%s.txt\" using 4 with points ls 3 title \"execution time (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, execTimeUnit))
- str += fmt.Sprintf(fmt.Sprintf("\"%s.txt\" using 5 with points ls 4 title \"late arrival timing (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, lateArrivalTimeUnit))
- str += fmt.Sprintf(fmt.Sprintf("\"%s.txt\" using 6 with points ls 5 title \"bandwidth (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, sendBWUnit))
+ str += fmt.Sprintf("\"%s\" using 2:xtic(1) with points ls 1 title \"data sent (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, sendUnit)
+ str += fmt.Sprintf("\"%s\" using 3 with points ls 2 title \"data received (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, recvUnit)
+ str += fmt.Sprintf("\"%s\" using 4 with points ls 3 title \"execution time (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, execTimeUnit)
+ str += fmt.Sprintf("\"%s\" using 5 with points ls 4 title \"late arrival timing (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, lateArrivalTimeUnit)
+ str += fmt.Sprintf("\"%s\" using 6 with points ls 5 title \"bandwidth (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, sendBWUnit)
for i := 1; i < len(hosts); i++ {
- str += fmt.Sprintf("\"%s.txt\" using 2:xtic(1) with points ls 1 notitle, \\\n", dataFiles[i])
- str += fmt.Sprintf("\"%s.txt\" using 3 with points ls 2 notitle, \\\n", dataFiles[i])
- str += fmt.Sprintf("\"%s.txt\" using 4 with points ls 3 notitle, \\\n", dataFiles[i])
- str += fmt.Sprintf("\"%s.txt\" using 5 with points ls 4 notitle, \\\n", dataFiles[i])
- str += fmt.Sprintf("\"%s.txt\" using 6 with points ls 5 notitle, \\\n", dataFiles[i])
+ str += fmt.Sprintf("\"%s\" using 2:xtic(1) with points ls 1 notitle, \\\n", dataFiles[i])
+ str += fmt.Sprintf("\"%s\" using 3 with points ls 2 notitle, \\\n", dataFiles[i])
+ str += fmt.Sprintf("\"%s\" using 4 with points ls 3 notitle, \\\n", dataFiles[i])
+ str += fmt.Sprintf("\"%s\" using 5 with points ls 4 notitle, \\\n", dataFiles[i])
+ str += fmt.Sprintf("\"%s\" using 6 with points ls 5 notitle, \\\n", dataFiles[i])
}
str = strings.TrimRight(str, ", \\\n")
_, err = fd.WriteString(str)
@@ -703,3 +715,729 @@ func Avgs(dir string, outputDir string, numRanks int, hostMap map[string][]int,
return runGnuplot(gnuplotScript, outputDir)
}
+
+type heavyPatternWithLeadRank struct {
+ leadRank int
+ pattern patterns.HeavyPattern
+}
+
+func generateHeavyPatternsDataFiles(dir string, outputDir string, allPatterns map[int]patterns.Data) ([]string, error) {
+ // collect patterns from different communicators
+ heavyPatterns := make([]heavyPatternWithLeadRank, 0)
+ for leadRank, data := range allPatterns {
+ for _, pattern := range data.HeavyPatterns {
+ heavyPatterns = append(heavyPatterns, heavyPatternWithLeadRank{
+ leadRank: leadRank,
+ pattern: pattern,
+ })
+ }
+ }
+
+ // sort by occurrence
+ sort.Slice(heavyPatterns, func(i, j int) bool {
+ return heavyPatterns[i].pattern.Occurrence > heavyPatterns[j].pattern.Occurrence
+ })
+
+ // 10 most heavy patterns
+ if len(heavyPatterns) > 10 {
+ heavyPatterns = heavyPatterns[:10]
+ }
+
+ gnuplotFiles := make([]string, 0)
+
+ for _, dist := range AllDists {
+ for i, heavyPattern := range heavyPatterns {
+ // find min/max value
+ maxBytes := 0
+ minBytes := math.MaxInt32
+ for _, ranks := range heavyPattern.pattern.Counts {
+ for _, value := range ranks {
+ if maxBytes < value {
+ maxBytes = value
+ }
+ if minBytes > value {
+ minBytes = value
+ }
+ }
+ }
+
+ // dump heat map data
+ dataFile := filepath.Join(outputDir, fmt.Sprintf("heavy_patterns_index%d_%s.txt", i, dist.Name()))
+ fd, err := os.OpenFile(dataFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0755)
+ if err != nil {
+ return nil, err
+ }
+ defer fd.Close()
+
+ ranks := make([]int, 0)
+ for rank := range heavyPattern.pattern.Counts {
+ ranks = append(ranks, rank)
+ }
+ sort.Ints(ranks)
+
+ // xlabels
+ var labels strings.Builder
+ lastRank := 0
+ for _, rank := range ranks {
+ // skip consecutive ranks
+ if rank != lastRank+1 || rank == ranks[len(ranks)-1] {
+ fmt.Fprintf(&labels, ",%d", rank)
+ } else {
+ fmt.Fprintf(&labels, ",")
+ }
+ lastRank = rank
+ }
+ _, err = fd.WriteString(fmt.Sprintf("%s\n", labels.String()))
+ if err != nil {
+ return nil, err
+ }
+
+ // heat map matrix
+ lastRank = 0
+ for _, rank := range ranks {
+ var row strings.Builder
+ // skip consecutive ranks
+ if rank != lastRank+1 || rank == ranks[len(ranks)-1] {
+ fmt.Fprintf(&row, "%d", rank)
+ }
+ lastRank = rank
+
+ for _, value := range heavyPattern.pattern.Counts[rank] {
+ // convert value range to color index
+ color := dist.Map(value, maxBytes)
+
+ fmt.Fprintf(&row, ",%d", color)
+ }
+
+ _, err = fd.WriteString(fmt.Sprintf("%s\n", row.String()))
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ // dump gnuplot script
+ gnuplotFile := filepath.Join(outputDir, fmt.Sprintf("heavy_patterns_index%d_%s.gnuplot", i, dist.Name()))
+ fd, err = os.OpenFile(gnuplotFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0755)
+ if err != nil {
+ return nil, err
+ }
+ defer fd.Close()
+
+ _, err = fd.WriteString(fmt.Sprintf(`set term png
+set output "heavy_patterns_index%d_%s.png"
+set title "Heat map Comm %d Top %d Occurrences %d"
+set size ratio 1
+set xlabel "Send Rank"
+set ylabel "Recv Rank"
+unset key
+set xrange [-0.5:%d.5]
+set yrange [-0.5:%d.5]
+set pointsize 2
+set datafile separator comma
+%s
+plot "heavy_patterns_index%d_%s.txt" matrix rowheaders columnheaders using 2:1:3 with image `, i, dist.Name(), heavyPattern.leadRank, i+1, heavyPattern.pattern.Occurrence, len(ranks)-1, len(ranks)-1, dist.GnuplotConfig(minBytes, maxBytes), i, dist.Name()))
+ if err != nil {
+ return nil, err
+ }
+
+ gnuplotFiles = append(gnuplotFiles, gnuplotFile)
+ }
+ }
+ return gnuplotFiles, nil
+}
+
+// HeavyPatterns plots the heavy patterns found during the post-mortem analysis
+func HeavyPatterns(dir string, outputDir string, patterns map[int]patterns.Data) error {
+ gnuplotScripts, err := generateHeavyPatternsDataFiles(dir, outputDir, patterns)
+ if err != nil {
+ return fmt.Errorf("generateHeavyPatternsDataFiles() failed: %s", err)
+ }
+
+ for _, gnuplotScript := range gnuplotScripts {
+ err = runGnuplot(gnuplotScript, outputDir)
+ if err != nil {
+ return fmt.Errorf("runGnuplot() failed: %s", err)
+ }
+ }
+
+ return nil
+}
+
+type Distribution interface {
+ Map(bytes int, maxBytes int) int
+ GnuplotConfig(minBytes int, maxBytes int) string
+ Name() string
+}
+
+var AllDists = []Distribution{
+ SimpleDistribution{},
+ LinearDistribution{},
+ LogarithmDistribution{},
+ Linear2Distribution{},
+ LinearViridisDistribution{},
+ QuadraticDistribution{},
+}
+
+func generateAllPatternsDataFiles(dir string, outputDir string, numRanks int, allPatterns map[int]patterns.Data, locationsData []*location.Data) ([]string, error) {
+ // create numRanks x numRanks matrix
+ matrix := make([][]int, numRanks)
+ for i := 0; i < numRanks; i++ {
+ matrix[i] = make([]int, numRanks)
+ }
+
+ // create mapping from local rank to COMM_WORLD ranks
+ mapping := make(map[int]map[int]int)
+ for _, data := range locationsData {
+ leadRank := data.RankLocations[0].CommWorldRank
+ mapping[leadRank] = make(map[int]int)
+ for _, loc := range data.RankLocations {
+ mapping[leadRank][loc.CommRank] = loc.CommWorldRank
+ }
+ }
+
+ // sum up patterns from different communicators
+ for leadRank, data := range allPatterns {
+ for _, pattern := range data.HeavyPatterns {
+ for from, value := range pattern.Counts {
+ for to, bytes := range value {
+ // convert `from` and `to` to COMM_WORLD ranks
+ world_from := mapping[leadRank][from]
+ world_to := mapping[leadRank][to]
+ matrix[world_from][world_to] += bytes * pattern.Occurrence
+ }
+ }
+ }
+ }
+
+ // find min/max value
+ maxBytes := matrix[0][0]
+ minBytes := matrix[0][0]
+ for rank := 0; rank < numRanks; rank++ {
+ for to := 0; to < numRanks; to++ {
+ if maxBytes < matrix[rank][to] {
+ maxBytes = matrix[rank][to]
+ }
+ if minBytes > matrix[rank][to] {
+ minBytes = matrix[rank][to]
+ }
+ }
+ }
+
+ gnuplotFiles := make([]string, 0)
+
+ for _, dist := range AllDists {
+ // dump heat map data
+ dataFile := filepath.Join(outputDir, fmt.Sprintf("all_patterns_%s.txt", dist.Name()))
+ fd, err := os.OpenFile(dataFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0755)
+ if err != nil {
+ return nil, err
+ }
+ defer fd.Close()
+
+ // xlabels
+ var labels strings.Builder
+ for rank := 0; rank < numRanks; rank++ {
+ if rank == 0 || rank == numRanks-1 {
+ fmt.Fprintf(&labels, ",%d", rank)
+ } else {
+ fmt.Fprintf(&labels, ",")
+ }
+ }
+ _, err = fd.WriteString(fmt.Sprintf("%s\n", labels.String()))
+ if err != nil {
+ return nil, err
+ }
+
+ // heat map matrix
+ for rank := 0; rank < numRanks; rank++ {
+ var row strings.Builder
+ if rank == 0 || rank == numRanks-1 {
+ fmt.Fprintf(&row, "%d", rank)
+ }
+
+ for to := 0; to < numRanks; to++ {
+ // convert value range to color index
+ value := matrix[rank][to]
+ color := dist.Map(value, maxBytes)
+
+ fmt.Fprintf(&row, ",%d", color)
+ }
+
+ _, err = fd.WriteString(fmt.Sprintf("%s\n", row.String()))
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ // dump gnuplot script
+ gnuplotFile := filepath.Join(outputDir, fmt.Sprintf("all_patterns_%s.gnuplot", dist.Name()))
+ fd, err = os.OpenFile(gnuplotFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0755)
+ if err != nil {
+ return nil, err
+ }
+ defer fd.Close()
+
+ _, err = fd.WriteString(fmt.Sprintf(`set term png
+set output "all_patterns_%s.png"
+set title "Heat map of sum of all patterns"
+set size ratio 1
+set xlabel "Send Rank"
+set ylabel "Recv Rank"
+unset key
+set xrange [-0.5:%d.5]
+set yrange [-0.5:%d.5]
+set pointsize 2
+set datafile separator comma
+%s
+plot "all_patterns_%s.txt" matrix rowheaders columnheaders using 2:1:3 with image `, dist.Name(), numRanks-1, numRanks-1, dist.GnuplotConfig(minBytes, maxBytes), dist.Name()))
+ if err != nil {
+ return nil, err
+ }
+
+ gnuplotFiles = append(gnuplotFiles, gnuplotFile)
+ }
+ return gnuplotFiles, nil
+}
+
+// distributions
+type SimpleDistribution struct{}
+
+func (d SimpleDistribution) Map(value int, maxBytes int) int {
+ // 0 'white', 1 'yellow', 2 'orange', 3 'green', 4 'red', 5 'purple', 6 'brown', 7 'black'
+ color := 0
+ if value > 0 {
+ value = (value - 1) / 10
+ color += 1
+ for value > 0 {
+ value /= 10
+ color += 1
+ }
+ }
+ return color
+}
+
+func (d SimpleDistribution) GnuplotConfig(minBytes int, maxBytes int) string {
+ // filled contour not working
+ // offset by 0.0001
+ // https://stackoverflow.com/questions/33955878/managing-the-palette-indicators-in-gnuplot
+ return `set palette defined (0 'white', 0.0001 'white', 0.0002 'yellow', 0.9999 'yellow', 1.0001 'orange', 1.9999 'orange', 2.0001 'green', 2.9999 'green', 3.0001 'red', 3.9999 'red', 4.0001 'purple', 4.9999 'purple', 5.0001 'brown', 5.9999 'brown', 6.0001 'black', 7 'black')
+ set cbrange [0:7]
+ set palette maxcolors 8
+ set cbtics ("0" 0, "10" 1, "100" 2, "1000" 3, "10000" 4, "100000" 5, "1000000" 6, "infinity" 7)`
+}
+
+func (d SimpleDistribution) Name() string {
+ return "simple"
+}
+
+type LinearDistribution struct{}
+
+func (d LinearDistribution) Map(value int, maxBytes int) int {
+ return value
+}
+
+func (d LinearDistribution) GnuplotConfig(minBytes int, maxBytes int) string {
+ b := strings.Builder{}
+ fmt.Fprintf(&b, "set palette defined( 0 'white', %d 'black' )\n", maxBytes)
+ fmt.Fprintf(&b, "set cbrange [0:%d]\n", maxBytes)
+ return b.String()
+}
+
+func (d LinearDistribution) Name() string {
+ return "linear"
+}
+
+type Linear2Distribution struct{}
+
+func (d Linear2Distribution) Map(value int, maxBytes int) int {
+ return value
+}
+
+func (d Linear2Distribution) GnuplotConfig(minBytes int, maxBytes int) string {
+ b := strings.Builder{}
+ fmt.Fprintf(&b, "set palette defined( %d 'white', %d 'black' )\n", minBytes, maxBytes)
+ fmt.Fprintf(&b, "set cbrange [%d:%d]\n", minBytes, maxBytes)
+ return b.String()
+}
+
+func (d Linear2Distribution) Name() string {
+ return "linear2"
+}
+
+// https://github.com/Gnuplotting/gnuplot-palettes/blob/master/viridis.pal
+type LinearViridisDistribution struct{}
+
+func (d LinearViridisDistribution) Map(value int, maxBytes int) int {
+ return value * 256 / maxBytes
+}
+
+func (d LinearViridisDistribution) GnuplotConfig(minBytes int, maxBytes int) string {
+ b := strings.Builder{}
+ fmt.Fprintf(&b, `# New matplotlib colormaps by Nathaniel J. Smith, Stefan van der Walt,
+ # and (in the case of viridis) Eric Firing.
+ #
+ # This file and the colormaps in it are released under the CC0 license /
+ # public domain dedication. We would appreciate credit if you use or
+ # redistribute these colormaps, but do not impose any legal restrictions.
+ #
+ # To the extent possible under law, the persons who associated CC0 with
+ # mpl-colormaps have waived all copyright and related or neighboring rights
+ # to mpl-colormaps.
+ #
+ # You should have received a copy of the CC0 legalcode along with this
+ # work. If not, see
Sum of All Patterns
+| Simple | +
+
+ |
+
| Linear Viridis | +
+
+ |
+
| Linear | +
+
+ |
+
| + Linear2 + | +
+
+ |
+
| + Logarithm + | +
+
+ |
+
| + Quadratic + | +
+
+ |
+