diff --git a/tools/cmd/map/map.go b/tools/cmd/map/map.go index c1a6f6f4..94438995 100644 --- a/tools/cmd/map/map.go +++ b/tools/cmd/map/map.go @@ -49,7 +49,7 @@ func main() { collectiveName := "alltoallv" // harcoded for now, detection coming soon // We do not care about the data returned by Create, we only care here about the files that are generated. - _, _, _, _, _, err := maps.Create(codeBaseDir, collectiveName, maps.Heat, *dir, nil) + _, _, _, _, _, _, err := maps.Create(codeBaseDir, collectiveName, maps.Heat, *dir, nil) if err != nil { fmt.Printf("ERROR: unable to create heat map: %s", err) os.Exit(1) diff --git a/tools/go.sum b/tools/go.sum index ec186c18..9abed5e8 100644 --- a/tools/go.sum +++ b/tools/go.sum @@ -2,4 +2,10 @@ github.com/gomarkdown/markdown v0.0.0-20200609195525-3f9352745725 h1:X6sZdr+t2E2 github.com/gomarkdown/markdown v0.0.0-20200609195525-3f9352745725/go.mod h1:aii0r/K0ZnHv7G0KF7xy1v0A7s2Ljrb5byB7MO5p6TU= github.com/gvallee/go_util v1.0.1 h1:Ch/PpAlHrHNmL2Upaxif/Nt4CqtaazDyTXh5fIhutJo= github.com/gvallee/go_util v1.0.1/go.mod h1:fTexpwdH/n05Ziu0TXJIQsr7E+46QpBxNdeOOsyC0/s= +github.com/lucasb-eyer/go-colorful v1.0.3 h1:QIbQXiugsb+q10B+MI+7DI1oQLdmnep86tWFlaaUAac= +github.com/lucasb-eyer/go-colorful v1.0.3/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= +github.com/mazznoer/colorgrad v0.8.1 h1:Bw/ks+KujOOg9E6YQvPqSqTLryiFnwliAH5VMZarSTI= +github.com/mazznoer/colorgrad v0.8.1/go.mod h1:xCjvoNkXHJIAPOUMSMrXkFdxTGQqk8zMYS3e5hSLghA= +github.com/mazznoer/csscolorparser v0.1.0 h1:xUf1uzU1r24JleIIb2Kz3bl7vATStxy53gm67yuPP+c= +github.com/mazznoer/csscolorparser v0.1.0/go.mod h1:Aj22+L/rYN/Y6bj3bYqO3N6g1dtdHtGfQ32xZ5PJQic= golang.org/dl v0.0.0-20190829154251-82a15e2f2ead/go.mod h1:IUMfjQLJQd4UTqG1Z90tenwKoCX93Gn3MAQJMOSBsDQ= diff --git a/tools/internal/pkg/maps/maps.go b/tools/internal/pkg/maps/maps.go index 6c4831ac..ad84dd3a 100644 --- a/tools/internal/pkg/maps/maps.go +++ b/tools/internal/pkg/maps/maps.go @@ -106,7 +106,16 @@ func saveGlobalHeatMap(codeBaseDir string, heatmap map[int]int, filepath string) } } defer fd.Close() - for key, value := range heatmap { + + // sort heatmap by rank + var sortedKey []int + for k := range heatmap { + sortedKey = append(sortedKey, k) + } + sort.Ints(sortedKey) + + for _, key := range sortedKey { + value := heatmap[key] _, err := fd.WriteString(fmt.Sprintf("Rank %d: %d bytes\n", key, value)) if err != nil { return err @@ -176,7 +185,15 @@ func saveHostHeatMap(codeBaseDir string, heatMap map[string]int, filepath string return err } - for key, value := range heatMap { + // sort heatMap by key + keys := make([]string, 0) + for k := range heatMap { + keys = append(keys, k) + } + sort.Strings(keys) + + for _, key := range keys { + value := heatMap[key] _, err := fd.WriteString(fmt.Sprintf("Host %s: %d bytes\n", key, value)) if err != nil { return err @@ -341,42 +358,43 @@ func createHeatMap(codeBaseDir string, collectiveName string, dir string, leadRa return nil } -func commCreate(codeBaseDir string, collectiveName string, dir string, leadRank int, allCallsData map[int]*counts.CallData, globalSendHeatMap map[int]int, globalRecvHeatMap map[int]int, rankNumCallsMap map[int]int) (*location.RankFileData, CallsDataT, error) { +func commCreate(codeBaseDir string, collectiveName string, dir string, leadRank int, allCallsData map[int]*counts.CallData, globalSendHeatMap map[int]int, globalRecvHeatMap map[int]int, rankNumCallsMap map[int]int) (*location.RankFileData, []*location.Data, CallsDataT, error) { commMaps := CallsDataT{ SendHeatMap: map[int]map[int]int{}, RecvHeatMap: map[int]map[int]int{}, } var rankFileData *location.RankFileData + var rankData []*location.Data var err error - rankFileData, _, commMaps.RanksMap, err = prepareRanksMap(codeBaseDir, dir) + rankFileData, _, commMaps.RanksMap, rankData, err = prepareRanksMap(codeBaseDir, dir) if err != nil { - return nil, commMaps, err + return nil, nil, commMaps, err } err = createHeatMap(codeBaseDir, collectiveName, dir, leadRank, rankFileData, allCallsData, &commMaps, globalSendHeatMap, globalRecvHeatMap, rankNumCallsMap) if err != nil { - return rankFileData, commMaps, err + return rankFileData, rankData, commMaps, err } // Save the heat maps for the entire execution globalSendHeatMapFilePath := filepath.Join(dir, GlobalHeatMapPrefix+"-send.md") err = saveGlobalHeatMap(codeBaseDir, globalSendHeatMap, globalSendHeatMapFilePath) if err != nil { - return rankFileData, commMaps, err + return rankFileData, rankData, commMaps, err } globalRecvHeatMapFilePath := filepath.Join(dir, GlobalHeatMapPrefix+"-recv.md") err = saveGlobalHeatMap(codeBaseDir, globalRecvHeatMap, globalRecvHeatMapFilePath) if err != nil { - return rankFileData, commMaps, err + return rankFileData, rankData, commMaps, err } - return rankFileData, commMaps, nil + return rankFileData, rankData, commMaps, nil } // Create is the main function to create heat maps. The id identifies what type of maps // need to be created. -func Create(codeBaseDir string, collectiveName string, id int, dir string, allCallsData []counts.CommDataT) (map[int]*location.RankFileData, map[int]CallsDataT, map[int]int, map[int]int, map[int]int, error) { +func Create(codeBaseDir string, collectiveName string, id int, dir string, allCallsData []counts.CommDataT) (map[int]*location.RankFileData, map[int]CallsDataT, []*location.Data, map[int]int, map[int]int, map[int]int, error) { switch id { case Heat: var err error @@ -385,13 +403,15 @@ func Create(codeBaseDir string, collectiveName string, id int, dir string, allCa globalCallsData := make(map[int]CallsDataT) // fixme: RankFileData is supposed to be static and dealing with ranks on comm world, no need to track per lead rank globalCommRankFileData := make(map[int]*location.RankFileData) + // all calls have the same location data + globalCommData := make([]*location.Data, 0) globalSendHeatMap := make(map[int]int) // The comm world rank is the key, the value amount of data sent to it globalRecvHeatMap := make(map[int]int) for _, commData := range allCallsData { - globalCommRankFileData[commData.LeadRank], globalCallsData[commData.LeadRank], err = commCreate(codeBaseDir, collectiveName, dir, commData.LeadRank, commData.CallData, globalSendHeatMap, globalRecvHeatMap, rankNumCallsMap) + globalCommRankFileData[commData.LeadRank], globalCommData, globalCallsData[commData.LeadRank], err = commCreate(codeBaseDir, collectiveName, dir, commData.LeadRank, commData.CallData, globalSendHeatMap, globalRecvHeatMap, rankNumCallsMap) if err != nil { - return nil, nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, nil, err } } @@ -399,19 +419,19 @@ func Create(codeBaseDir string, collectiveName string, id int, dir string, allCa globalSendHeatMapFilePath := filepath.Join(dir, GlobalHeatMapPrefix+"-send.md") err = saveGlobalHeatMap(codeBaseDir, globalSendHeatMap, globalSendHeatMapFilePath) if err != nil { - return nil, nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, nil, err } globalRecvHeatMapFilePath := filepath.Join(dir, GlobalHeatMapPrefix+"-recv.md") err = saveGlobalHeatMap(codeBaseDir, globalRecvHeatMap, globalRecvHeatMapFilePath) if err != nil { - return nil, nil, nil, nil, nil, err + return nil, nil, nil, nil, nil, nil, err } - return globalCommRankFileData, globalCallsData, globalSendHeatMap, globalRecvHeatMap, rankNumCallsMap, nil + return globalCommRankFileData, globalCallsData, globalCommData, globalSendHeatMap, globalRecvHeatMap, rankNumCallsMap, nil } - return nil, nil, nil, nil, nil, fmt.Errorf("unknown map type: %d", id) + return nil, nil, nil, nil, nil, nil, fmt.Errorf("unknown map type: %d", id) } func saveProcessedLocationData(dir string, leadRank int, info map[int]int) error { @@ -481,7 +501,15 @@ func createRankFile(dir string, hm *location.RankFileData) error { return err } - for host, rankList := range hm.HostMap { + // sort hm.HostMap by key + keys := make([]string, 0) + for k := range hm.HostMap { + keys = append(keys, k) + } + sort.Strings(keys) + + for _, host := range keys { + rankList := hm.HostMap[host] sort.Ints(rankList) _, err = fd.WriteString(fmt.Sprintf("Host %s - %d ranks: %s\n", host, len(rankList), notation.CompressIntArray(rankList))) if err != nil { @@ -492,7 +520,7 @@ func createRankFile(dir string, hm *location.RankFileData) error { return nil } -func prepareRanksMap(codeBaseDir string, dir string) (*location.RankFileData, map[int][]*location.RankLocation, map[int]map[int]int, error) { +func prepareRanksMap(codeBaseDir string, dir string) (*location.RankFileData, map[int][]*location.RankLocation, map[int]map[int]int, []*location.Data, error) { callMap := make(map[int][]*location.RankLocation) callsRanksMap := make(map[int]map[int]int) // This is to track the files for a specific communicator @@ -503,7 +531,7 @@ func prepareRanksMap(codeBaseDir string, dir string) (*location.RankFileData, ma // Find all the location files f, err := ioutil.ReadDir(dir) if err != nil { - return nil, nil, nil, err + return nil, nil, nil, nil, err } var locationFiles []string for _, file := range f { @@ -513,13 +541,15 @@ func prepareRanksMap(codeBaseDir string, dir string) (*location.RankFileData, ma locationFiles = append(locationFiles, filepath.Join(dir, filename)) } } + locationsDataList := make([]*location.Data, 0) // Parse each file and aggregate the results from each file. for _, locationFile := range locationFiles { callsData, locationsData, err := location.ParseLocationFile(codeBaseDir, locationFile) if err != nil { - return nil, nil, nil, err + return nil, nil, nil, nil, err } + locationsDataList = append(locationsDataList, locationsData) for callID := range callsData { if _, ok := callsRanksMap[callID]; !ok { // Transform the array of locations into a map @@ -547,10 +577,10 @@ func prepareRanksMap(codeBaseDir string, dir string) (*location.RankFileData, ma err = createRankFile(dir, hm) if err != nil { - return hm, nil, nil, err + return hm, nil, nil, nil, err } - return hm, callMap, callsRanksMap, nil + return hm, callMap, callsRanksMap, locationsDataList, nil } // CreateAvgMaps uses the send and receive counts to create an average heat map of the data that is sent/received diff --git a/tools/internal/pkg/patterns/patterns.go b/tools/internal/pkg/patterns/patterns.go index f8fd2ea4..254a765e 100644 --- a/tools/internal/pkg/patterns/patterns.go +++ b/tools/internal/pkg/patterns/patterns.go @@ -14,6 +14,7 @@ import ( "os" "path/filepath" "reflect" + "sort" "strings" "github.com/gvallee/alltoallv_profiling/tools/internal/pkg/counts" @@ -34,6 +35,18 @@ type CallData struct { Calls []int } +type HeavyPattern struct { + // The number of calls + Occurrence int + + // Raw string presentation of counts + RawCounts string + + // Counts are the counts for all ranks involved in the operation + // The key is the rank sending/receiving the data and the value an array of integers representing counts for each destination/source + Counts map[int][]int +} + // Data holds the data all the patterns the infrastructure was able to detect type Data struct { // AllPatterns is the data for all the patterns that have been detected @@ -50,6 +63,19 @@ type Data struct { // Empty is the data of all the patterns that do not exchange any data (all counts are equal to 0) Empty []*CallData + + // HeavyPatterns is the list of patterns sorted by occurrence + HeavyPatterns []HeavyPattern +} + +// Convert raw counts to string to store the slice as key +func rawCountsToKey(counts []string) string { + var buf strings.Builder + for _, count := range counts { + fmt.Fprintf(&buf, "%s\n", count) + } + + return buf.String() } func CompareCallPatterns(p1 map[int]int, p2 map[int]int) bool { @@ -189,6 +215,11 @@ func GetSummaryFilePath(basedir string, jobid int, rank int) string { return filepath.Join(basedir, fmt.Sprintf("%sjob%d-rank%d.md", SummaryFilePrefix, jobid, rank)) } +// GetHeavyFilePath returns the full path to the heavy pattern file +func GetHeavyFilePath(basedir string) string { + return filepath.Join(basedir, fmt.Sprintf("heavy-patterns.md")) +} + func getPatterns(reader *bufio.Reader) (string, error) { patterns := "" @@ -496,6 +527,10 @@ func ParseFiles(sendCountsFile string, recvCountsFile string, numCalls int, rank return nil, patterns, fmt.Errorf("counts.LoadCallsData() did not return any data") } + // collect heavy patterns + // the key is RawCount + var heavyPattern = make(map[string]HeavyPattern) + b := progress.NewBar(numCalls, "Analyzing alltoallv calls") defer progress.EndBar(b) for i := 0; i < numCalls; i++ { @@ -518,6 +553,20 @@ func ParseFiles(sendCountsFile string, recvCountsFile string, numCalls int, rank return nil, patterns, fmt.Errorf("no recv patterns available") } + // Analyze heavy pattern + key := rawCountsToKey(callData[i].SendData.RawCounts) + pattern, ok := heavyPattern[key] + if !ok { + // does not exist + pattern.RawCounts = key + for _, counts := range callData[i].SendData.Counts { + // use the first map, all values are the same + pattern.Counts = counts + } + } + pattern.Occurrence += 1 + heavyPattern[key] = pattern + // Analyze the send/receive pattern from the call err := patterns.addPattern(i, callData[i].SendData.Statistics.Patterns, callData[i].RecvData.Statistics.Patterns) if err != nil { @@ -537,6 +586,18 @@ func ParseFiles(sendCountsFile string, recvCountsFile string, numCalls int, rank return nil, patterns, fmt.Errorf("extracted data of %d calls instead of %d", len(callData), numCalls) } + // sort heavy patterns by occurrence + for _, val := range heavyPattern { + patterns.HeavyPatterns = append(patterns.HeavyPatterns, val) + } + sort.Slice(patterns.HeavyPatterns, func(i, j int) bool { + if patterns.HeavyPatterns[i].Occurrence != patterns.HeavyPatterns[j].Occurrence { + return patterns.HeavyPatterns[i].Occurrence > patterns.HeavyPatterns[j].Occurrence + } else { + return patterns.HeavyPatterns[i].RawCounts > patterns.HeavyPatterns[j].RawCounts + } + }) + return callData, patterns, nil } @@ -555,6 +616,7 @@ func WriteData(patternsFd *os.File, patternsSummaryFd *os.File, patternsData Dat num++ } + // patterns summary if !NoSummary(patternsData) { if len(patternsData.OneToN) != 0 { _, err := patternsSummaryFd.WriteString("# 1 to N patterns\n\n") diff --git a/tools/internal/pkg/plot/plot.go b/tools/internal/pkg/plot/plot.go index 8d54a038..c044fb47 100644 --- a/tools/internal/pkg/plot/plot.go +++ b/tools/internal/pkg/plot/plot.go @@ -18,6 +18,8 @@ import ( "strconv" "strings" + "github.com/gvallee/alltoallv_profiling/tools/internal/pkg/location" + "github.com/gvallee/alltoallv_profiling/tools/internal/pkg/patterns" "github.com/gvallee/alltoallv_profiling/tools/internal/pkg/scale" "github.com/gvallee/go_util/pkg/util" ) @@ -214,6 +216,11 @@ func (d *plotData) generateCallsAvgs(hostname string, leadRank int, callID int) } } for _, rank := range ranks { + if _, ok := d.execTimeMap[rank]; !ok { + // exec time not found, avoid division with zero + continue + } + d.sendRankBW[rank] = float64(d.sendHeatMap[rank]) / d.execTimeMap[rank] d.recvRankBW[rank] = float64(d.recvHeatMap[rank]) / d.execTimeMap[rank] @@ -291,6 +298,11 @@ func (d *plotData) generateHostAvgs(hostname string) error { } } for _, rank := range ranks { + if _, ok := d.avgExecTimeMap[rank]; !ok { + // exec time not found, avoid division with zero + continue + } + d.sendRankBW[rank] = float64(d.avgSendHeatMap[rank]) / d.avgExecTimeMap[rank] d.recvRankBW[rank] = float64(d.avgRecvHeatMap[rank]) / d.avgExecTimeMap[rank] @@ -561,17 +573,17 @@ func write(fd *os.File, dataFiles []string, numRanks int, maxValue int, hosts [] } // Special for the first node - str += fmt.Sprintf(fmt.Sprintf("\"%s.txt\" using 2:xtic(1) with points ls 1 title \"data sent (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, sendUnit)) - str += fmt.Sprintf(fmt.Sprintf("\"%s.txt\" using 3 with points ls 2 title \"data received (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, recvUnit)) - str += fmt.Sprintf(fmt.Sprintf("\"%s.txt\" using 4 with points ls 3 title \"execution time (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, execTimeUnit)) - str += fmt.Sprintf(fmt.Sprintf("\"%s.txt\" using 5 with points ls 4 title \"late arrival timing (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, lateArrivalTimeUnit)) - str += fmt.Sprintf(fmt.Sprintf("\"%s.txt\" using 6 with points ls 5 title \"bandwidth (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, sendBWUnit)) + str += fmt.Sprintf("\"%s\" using 2:xtic(1) with points ls 1 title \"data sent (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, sendUnit) + str += fmt.Sprintf("\"%s\" using 3 with points ls 2 title \"data received (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, recvUnit) + str += fmt.Sprintf("\"%s\" using 4 with points ls 3 title \"execution time (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, execTimeUnit) + str += fmt.Sprintf("\"%s\" using 5 with points ls 4 title \"late arrival timing (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, lateArrivalTimeUnit) + str += fmt.Sprintf("\"%s\" using 6 with points ls 5 title \"bandwidth (%s)\", \\\n", dataFiles[0] /*filepath.Base(getPlotDataFilePath(outputDir, leadRank, callID, hosts[0]))*/, sendBWUnit) for i := 1; i < len(hosts); i++ { - str += fmt.Sprintf("\"%s.txt\" using 2:xtic(1) with points ls 1 notitle, \\\n", dataFiles[i]) - str += fmt.Sprintf("\"%s.txt\" using 3 with points ls 2 notitle, \\\n", dataFiles[i]) - str += fmt.Sprintf("\"%s.txt\" using 4 with points ls 3 notitle, \\\n", dataFiles[i]) - str += fmt.Sprintf("\"%s.txt\" using 5 with points ls 4 notitle, \\\n", dataFiles[i]) - str += fmt.Sprintf("\"%s.txt\" using 6 with points ls 5 notitle, \\\n", dataFiles[i]) + str += fmt.Sprintf("\"%s\" using 2:xtic(1) with points ls 1 notitle, \\\n", dataFiles[i]) + str += fmt.Sprintf("\"%s\" using 3 with points ls 2 notitle, \\\n", dataFiles[i]) + str += fmt.Sprintf("\"%s\" using 4 with points ls 3 notitle, \\\n", dataFiles[i]) + str += fmt.Sprintf("\"%s\" using 5 with points ls 4 notitle, \\\n", dataFiles[i]) + str += fmt.Sprintf("\"%s\" using 6 with points ls 5 notitle, \\\n", dataFiles[i]) } str = strings.TrimRight(str, ", \\\n") _, err = fd.WriteString(str) @@ -703,3 +715,729 @@ func Avgs(dir string, outputDir string, numRanks int, hostMap map[string][]int, return runGnuplot(gnuplotScript, outputDir) } + +type heavyPatternWithLeadRank struct { + leadRank int + pattern patterns.HeavyPattern +} + +func generateHeavyPatternsDataFiles(dir string, outputDir string, allPatterns map[int]patterns.Data) ([]string, error) { + // collect patterns from different communicators + heavyPatterns := make([]heavyPatternWithLeadRank, 0) + for leadRank, data := range allPatterns { + for _, pattern := range data.HeavyPatterns { + heavyPatterns = append(heavyPatterns, heavyPatternWithLeadRank{ + leadRank: leadRank, + pattern: pattern, + }) + } + } + + // sort by occurrence + sort.Slice(heavyPatterns, func(i, j int) bool { + return heavyPatterns[i].pattern.Occurrence > heavyPatterns[j].pattern.Occurrence + }) + + // 10 most heavy patterns + if len(heavyPatterns) > 10 { + heavyPatterns = heavyPatterns[:10] + } + + gnuplotFiles := make([]string, 0) + + for _, dist := range AllDists { + for i, heavyPattern := range heavyPatterns { + // find min/max value + maxBytes := 0 + minBytes := math.MaxInt32 + for _, ranks := range heavyPattern.pattern.Counts { + for _, value := range ranks { + if maxBytes < value { + maxBytes = value + } + if minBytes > value { + minBytes = value + } + } + } + + // dump heat map data + dataFile := filepath.Join(outputDir, fmt.Sprintf("heavy_patterns_index%d_%s.txt", i, dist.Name())) + fd, err := os.OpenFile(dataFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0755) + if err != nil { + return nil, err + } + defer fd.Close() + + ranks := make([]int, 0) + for rank := range heavyPattern.pattern.Counts { + ranks = append(ranks, rank) + } + sort.Ints(ranks) + + // xlabels + var labels strings.Builder + lastRank := 0 + for _, rank := range ranks { + // skip consecutive ranks + if rank != lastRank+1 || rank == ranks[len(ranks)-1] { + fmt.Fprintf(&labels, ",%d", rank) + } else { + fmt.Fprintf(&labels, ",") + } + lastRank = rank + } + _, err = fd.WriteString(fmt.Sprintf("%s\n", labels.String())) + if err != nil { + return nil, err + } + + // heat map matrix + lastRank = 0 + for _, rank := range ranks { + var row strings.Builder + // skip consecutive ranks + if rank != lastRank+1 || rank == ranks[len(ranks)-1] { + fmt.Fprintf(&row, "%d", rank) + } + lastRank = rank + + for _, value := range heavyPattern.pattern.Counts[rank] { + // convert value range to color index + color := dist.Map(value, maxBytes) + + fmt.Fprintf(&row, ",%d", color) + } + + _, err = fd.WriteString(fmt.Sprintf("%s\n", row.String())) + if err != nil { + return nil, err + } + } + + // dump gnuplot script + gnuplotFile := filepath.Join(outputDir, fmt.Sprintf("heavy_patterns_index%d_%s.gnuplot", i, dist.Name())) + fd, err = os.OpenFile(gnuplotFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0755) + if err != nil { + return nil, err + } + defer fd.Close() + + _, err = fd.WriteString(fmt.Sprintf(`set term png +set output "heavy_patterns_index%d_%s.png" +set title "Heat map Comm %d Top %d Occurrences %d" +set size ratio 1 +set xlabel "Send Rank" +set ylabel "Recv Rank" +unset key +set xrange [-0.5:%d.5] +set yrange [-0.5:%d.5] +set pointsize 2 +set datafile separator comma +%s +plot "heavy_patterns_index%d_%s.txt" matrix rowheaders columnheaders using 2:1:3 with image `, i, dist.Name(), heavyPattern.leadRank, i+1, heavyPattern.pattern.Occurrence, len(ranks)-1, len(ranks)-1, dist.GnuplotConfig(minBytes, maxBytes), i, dist.Name())) + if err != nil { + return nil, err + } + + gnuplotFiles = append(gnuplotFiles, gnuplotFile) + } + } + return gnuplotFiles, nil +} + +// HeavyPatterns plots the heavy patterns found during the post-mortem analysis +func HeavyPatterns(dir string, outputDir string, patterns map[int]patterns.Data) error { + gnuplotScripts, err := generateHeavyPatternsDataFiles(dir, outputDir, patterns) + if err != nil { + return fmt.Errorf("generateHeavyPatternsDataFiles() failed: %s", err) + } + + for _, gnuplotScript := range gnuplotScripts { + err = runGnuplot(gnuplotScript, outputDir) + if err != nil { + return fmt.Errorf("runGnuplot() failed: %s", err) + } + } + + return nil +} + +type Distribution interface { + Map(bytes int, maxBytes int) int + GnuplotConfig(minBytes int, maxBytes int) string + Name() string +} + +var AllDists = []Distribution{ + SimpleDistribution{}, + LinearDistribution{}, + LogarithmDistribution{}, + Linear2Distribution{}, + LinearViridisDistribution{}, + QuadraticDistribution{}, +} + +func generateAllPatternsDataFiles(dir string, outputDir string, numRanks int, allPatterns map[int]patterns.Data, locationsData []*location.Data) ([]string, error) { + // create numRanks x numRanks matrix + matrix := make([][]int, numRanks) + for i := 0; i < numRanks; i++ { + matrix[i] = make([]int, numRanks) + } + + // create mapping from local rank to COMM_WORLD ranks + mapping := make(map[int]map[int]int) + for _, data := range locationsData { + leadRank := data.RankLocations[0].CommWorldRank + mapping[leadRank] = make(map[int]int) + for _, loc := range data.RankLocations { + mapping[leadRank][loc.CommRank] = loc.CommWorldRank + } + } + + // sum up patterns from different communicators + for leadRank, data := range allPatterns { + for _, pattern := range data.HeavyPatterns { + for from, value := range pattern.Counts { + for to, bytes := range value { + // convert `from` and `to` to COMM_WORLD ranks + world_from := mapping[leadRank][from] + world_to := mapping[leadRank][to] + matrix[world_from][world_to] += bytes * pattern.Occurrence + } + } + } + } + + // find min/max value + maxBytes := matrix[0][0] + minBytes := matrix[0][0] + for rank := 0; rank < numRanks; rank++ { + for to := 0; to < numRanks; to++ { + if maxBytes < matrix[rank][to] { + maxBytes = matrix[rank][to] + } + if minBytes > matrix[rank][to] { + minBytes = matrix[rank][to] + } + } + } + + gnuplotFiles := make([]string, 0) + + for _, dist := range AllDists { + // dump heat map data + dataFile := filepath.Join(outputDir, fmt.Sprintf("all_patterns_%s.txt", dist.Name())) + fd, err := os.OpenFile(dataFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0755) + if err != nil { + return nil, err + } + defer fd.Close() + + // xlabels + var labels strings.Builder + for rank := 0; rank < numRanks; rank++ { + if rank == 0 || rank == numRanks-1 { + fmt.Fprintf(&labels, ",%d", rank) + } else { + fmt.Fprintf(&labels, ",") + } + } + _, err = fd.WriteString(fmt.Sprintf("%s\n", labels.String())) + if err != nil { + return nil, err + } + + // heat map matrix + for rank := 0; rank < numRanks; rank++ { + var row strings.Builder + if rank == 0 || rank == numRanks-1 { + fmt.Fprintf(&row, "%d", rank) + } + + for to := 0; to < numRanks; to++ { + // convert value range to color index + value := matrix[rank][to] + color := dist.Map(value, maxBytes) + + fmt.Fprintf(&row, ",%d", color) + } + + _, err = fd.WriteString(fmt.Sprintf("%s\n", row.String())) + if err != nil { + return nil, err + } + } + + // dump gnuplot script + gnuplotFile := filepath.Join(outputDir, fmt.Sprintf("all_patterns_%s.gnuplot", dist.Name())) + fd, err = os.OpenFile(gnuplotFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0755) + if err != nil { + return nil, err + } + defer fd.Close() + + _, err = fd.WriteString(fmt.Sprintf(`set term png +set output "all_patterns_%s.png" +set title "Heat map of sum of all patterns" +set size ratio 1 +set xlabel "Send Rank" +set ylabel "Recv Rank" +unset key +set xrange [-0.5:%d.5] +set yrange [-0.5:%d.5] +set pointsize 2 +set datafile separator comma +%s +plot "all_patterns_%s.txt" matrix rowheaders columnheaders using 2:1:3 with image `, dist.Name(), numRanks-1, numRanks-1, dist.GnuplotConfig(minBytes, maxBytes), dist.Name())) + if err != nil { + return nil, err + } + + gnuplotFiles = append(gnuplotFiles, gnuplotFile) + } + return gnuplotFiles, nil +} + +// distributions +type SimpleDistribution struct{} + +func (d SimpleDistribution) Map(value int, maxBytes int) int { + // 0 'white', 1 'yellow', 2 'orange', 3 'green', 4 'red', 5 'purple', 6 'brown', 7 'black' + color := 0 + if value > 0 { + value = (value - 1) / 10 + color += 1 + for value > 0 { + value /= 10 + color += 1 + } + } + return color +} + +func (d SimpleDistribution) GnuplotConfig(minBytes int, maxBytes int) string { + // filled contour not working + // offset by 0.0001 + // https://stackoverflow.com/questions/33955878/managing-the-palette-indicators-in-gnuplot + return `set palette defined (0 'white', 0.0001 'white', 0.0002 'yellow', 0.9999 'yellow', 1.0001 'orange', 1.9999 'orange', 2.0001 'green', 2.9999 'green', 3.0001 'red', 3.9999 'red', 4.0001 'purple', 4.9999 'purple', 5.0001 'brown', 5.9999 'brown', 6.0001 'black', 7 'black') + set cbrange [0:7] + set palette maxcolors 8 + set cbtics ("0" 0, "10" 1, "100" 2, "1000" 3, "10000" 4, "100000" 5, "1000000" 6, "infinity" 7)` +} + +func (d SimpleDistribution) Name() string { + return "simple" +} + +type LinearDistribution struct{} + +func (d LinearDistribution) Map(value int, maxBytes int) int { + return value +} + +func (d LinearDistribution) GnuplotConfig(minBytes int, maxBytes int) string { + b := strings.Builder{} + fmt.Fprintf(&b, "set palette defined( 0 'white', %d 'black' )\n", maxBytes) + fmt.Fprintf(&b, "set cbrange [0:%d]\n", maxBytes) + return b.String() +} + +func (d LinearDistribution) Name() string { + return "linear" +} + +type Linear2Distribution struct{} + +func (d Linear2Distribution) Map(value int, maxBytes int) int { + return value +} + +func (d Linear2Distribution) GnuplotConfig(minBytes int, maxBytes int) string { + b := strings.Builder{} + fmt.Fprintf(&b, "set palette defined( %d 'white', %d 'black' )\n", minBytes, maxBytes) + fmt.Fprintf(&b, "set cbrange [%d:%d]\n", minBytes, maxBytes) + return b.String() +} + +func (d Linear2Distribution) Name() string { + return "linear2" +} + +// https://github.com/Gnuplotting/gnuplot-palettes/blob/master/viridis.pal +type LinearViridisDistribution struct{} + +func (d LinearViridisDistribution) Map(value int, maxBytes int) int { + return value * 256 / maxBytes +} + +func (d LinearViridisDistribution) GnuplotConfig(minBytes int, maxBytes int) string { + b := strings.Builder{} + fmt.Fprintf(&b, `# New matplotlib colormaps by Nathaniel J. Smith, Stefan van der Walt, + # and (in the case of viridis) Eric Firing. + # + # This file and the colormaps in it are released under the CC0 license / + # public domain dedication. We would appreciate credit if you use or + # redistribute these colormaps, but do not impose any legal restrictions. + # + # To the extent possible under law, the persons who associated CC0 with + # mpl-colormaps have waived all copyright and related or neighboring rights + # to mpl-colormaps. + # + # You should have received a copy of the CC0 legalcode along with this + # work. If not, see . + + #https://github.com/BIDS/colormap/blob/master/colormaps.py + + + # line styles + set style line 1 lt 1 lc rgb '#440154' # dark purple + set style line 2 lt 1 lc rgb '#472c7a' # purple + set style line 3 lt 1 lc rgb '#3b518b' # blue + set style line 4 lt 1 lc rgb '#2c718e' # blue + set style line 5 lt 1 lc rgb '#21908d' # blue-green + set style line 6 lt 1 lc rgb '#27ad81' # green + set style line 7 lt 1 lc rgb '#5cc863' # green + set style line 8 lt 1 lc rgb '#aadc32' # lime green + set style line 9 lt 1 lc rgb '#fde725' # yellow + + + # palette + set palette defined (\ + 0 0.267004 0.004874 0.329415,\ + 1 0.268510 0.009605 0.335427,\ + 2 0.269944 0.014625 0.341379,\ + 3 0.271305 0.019942 0.347269,\ + 4 0.272594 0.025563 0.353093,\ + 5 0.273809 0.031497 0.358853,\ + 6 0.274952 0.037752 0.364543,\ + 7 0.276022 0.044167 0.370164,\ + 8 0.277018 0.050344 0.375715,\ + 9 0.277941 0.056324 0.381191,\ + 10 0.278791 0.062145 0.386592,\ + 11 0.279566 0.067836 0.391917,\ + 12 0.280267 0.073417 0.397163,\ + 13 0.280894 0.078907 0.402329,\ + 14 0.281446 0.084320 0.407414,\ + 15 0.281924 0.089666 0.412415,\ + 16 0.282327 0.094955 0.417331,\ + 17 0.282656 0.100196 0.422160,\ + 18 0.282910 0.105393 0.426902,\ + 19 0.283091 0.110553 0.431554,\ + 20 0.283197 0.115680 0.436115,\ + 21 0.283229 0.120777 0.440584,\ + 22 0.283187 0.125848 0.444960,\ + 23 0.283072 0.130895 0.449241,\ + 24 0.282884 0.135920 0.453427,\ + 25 0.282623 0.140926 0.457517,\ + 26 0.282290 0.145912 0.461510,\ + 27 0.281887 0.150881 0.465405,\ + 28 0.281412 0.155834 0.469201,\ + 29 0.280868 0.160771 0.472899,\ + 30 0.280255 0.165693 0.476498,\ + 31 0.279574 0.170599 0.479997,\ + 32 0.278826 0.175490 0.483397,\ + 33 0.278012 0.180367 0.486697,\ + 34 0.277134 0.185228 0.489898,\ + 35 0.276194 0.190074 0.493001,\ + 36 0.275191 0.194905 0.496005,\ + 37 0.274128 0.199721 0.498911,\ + 38 0.273006 0.204520 0.501721,\ + 39 0.271828 0.209303 0.504434,\ + 40 0.270595 0.214069 0.507052,\ + 41 0.269308 0.218818 0.509577,\ + 42 0.267968 0.223549 0.512008,\ + 43 0.266580 0.228262 0.514349,\ + 44 0.265145 0.232956 0.516599,\ + 45 0.263663 0.237631 0.518762,\ + 46 0.262138 0.242286 0.520837,\ + 47 0.260571 0.246922 0.522828,\ + 48 0.258965 0.251537 0.524736,\ + 49 0.257322 0.256130 0.526563,\ + 50 0.255645 0.260703 0.528312,\ + 51 0.253935 0.265254 0.529983,\ + 52 0.252194 0.269783 0.531579,\ + 53 0.250425 0.274290 0.533103,\ + 54 0.248629 0.278775 0.534556,\ + 55 0.246811 0.283237 0.535941,\ + 56 0.244972 0.287675 0.537260,\ + 57 0.243113 0.292092 0.538516,\ + 58 0.241237 0.296485 0.539709,\ + 59 0.239346 0.300855 0.540844,\ + 60 0.237441 0.305202 0.541921,\ + 61 0.235526 0.309527 0.542944,\ + 62 0.233603 0.313828 0.543914,\ + 63 0.231674 0.318106 0.544834,\ + 64 0.229739 0.322361 0.545706,\ + 65 0.227802 0.326594 0.546532,\ + 66 0.225863 0.330805 0.547314,\ + 67 0.223925 0.334994 0.548053,\ + 68 0.221989 0.339161 0.548752,\ + 69 0.220057 0.343307 0.549413,\ + 70 0.218130 0.347432 0.550038,\ + 71 0.216210 0.351535 0.550627,\ + 72 0.214298 0.355619 0.551184,\ + 73 0.212395 0.359683 0.551710,\ + 74 0.210503 0.363727 0.552206,\ + 75 0.208623 0.367752 0.552675,\ + 76 0.206756 0.371758 0.553117,\ + 77 0.204903 0.375746 0.553533,\ + 78 0.203063 0.379716 0.553925,\ + 79 0.201239 0.383670 0.554294,\ + 80 0.199430 0.387607 0.554642,\ + 81 0.197636 0.391528 0.554969,\ + 82 0.195860 0.395433 0.555276,\ + 83 0.194100 0.399323 0.555565,\ + 84 0.192357 0.403199 0.555836,\ + 85 0.190631 0.407061 0.556089,\ + 86 0.188923 0.410910 0.556326,\ + 87 0.187231 0.414746 0.556547,\ + 88 0.185556 0.418570 0.556753,\ + 89 0.183898 0.422383 0.556944,\ + 90 0.182256 0.426184 0.557120,\ + 91 0.180629 0.429975 0.557282,\ + 92 0.179019 0.433756 0.557430,\ + 93 0.177423 0.437527 0.557565,\ + 94 0.175841 0.441290 0.557685,\ + 95 0.174274 0.445044 0.557792,\ + 96 0.172719 0.448791 0.557885,\ + 97 0.171176 0.452530 0.557965,\ + 98 0.169646 0.456262 0.558030,\ + 99 0.168126 0.459988 0.558082,\ + 100 0.166617 0.463708 0.558119,\ + 101 0.165117 0.467423 0.558141,\ + 102 0.163625 0.471133 0.558148,\ + 103 0.162142 0.474838 0.558140,\ + 104 0.160665 0.478540 0.558115,\ + 105 0.159194 0.482237 0.558073,\ + 106 0.157729 0.485932 0.558013,\ + 107 0.156270 0.489624 0.557936,\ + 108 0.154815 0.493313 0.557840,\ + 109 0.153364 0.497000 0.557724,\ + 110 0.151918 0.500685 0.557587,\ + 111 0.150476 0.504369 0.557430,\ + 112 0.149039 0.508051 0.557250,\ + 113 0.147607 0.511733 0.557049,\ + 114 0.146180 0.515413 0.556823,\ + 115 0.144759 0.519093 0.556572,\ + 116 0.143343 0.522773 0.556295,\ + 117 0.141935 0.526453 0.555991,\ + 118 0.140536 0.530132 0.555659,\ + 119 0.139147 0.533812 0.555298,\ + 120 0.137770 0.537492 0.554906,\ + 121 0.136408 0.541173 0.554483,\ + 122 0.135066 0.544853 0.554029,\ + 123 0.133743 0.548535 0.553541,\ + 124 0.132444 0.552216 0.553018,\ + 125 0.131172 0.555899 0.552459,\ + 126 0.129933 0.559582 0.551864,\ + 127 0.128729 0.563265 0.551229,\ + 128 0.127568 0.566949 0.550556,\ + 129 0.126453 0.570633 0.549841,\ + 130 0.125394 0.574318 0.549086,\ + 131 0.124395 0.578002 0.548287,\ + 132 0.123463 0.581687 0.547445,\ + 133 0.122606 0.585371 0.546557,\ + 134 0.121831 0.589055 0.545623,\ + 135 0.121148 0.592739 0.544641,\ + 136 0.120565 0.596422 0.543611,\ + 137 0.120092 0.600104 0.542530,\ + 138 0.119738 0.603785 0.541400,\ + 139 0.119512 0.607464 0.540218,\ + 140 0.119423 0.611141 0.538982,\ + 141 0.119483 0.614817 0.537692,\ + 142 0.119699 0.618490 0.536347,\ + 143 0.120081 0.622161 0.534946,\ + 144 0.120638 0.625828 0.533488,\ + 145 0.121380 0.629492 0.531973,\ + 146 0.122312 0.633153 0.530398,\ + 147 0.123444 0.636809 0.528763,\ + 148 0.124780 0.640461 0.527068,\ + 149 0.126326 0.644107 0.525311,\ + 150 0.128087 0.647749 0.523491,\ + 151 0.130067 0.651384 0.521608,\ + 152 0.132268 0.655014 0.519661,\ + 153 0.134692 0.658636 0.517649,\ + 154 0.137339 0.662252 0.515571,\ + 155 0.140210 0.665859 0.513427,\ + 156 0.143303 0.669459 0.511215,\ + 157 0.146616 0.673050 0.508936,\ + 158 0.150148 0.676631 0.506589,\ + 159 0.153894 0.680203 0.504172,\ + 160 0.157851 0.683765 0.501686,\ + 161 0.162016 0.687316 0.499129,\ + 162 0.166383 0.690856 0.496502,\ + 163 0.170948 0.694384 0.493803,\ + 164 0.175707 0.697900 0.491033,\ + 165 0.180653 0.701402 0.488189,\ + 166 0.185783 0.704891 0.485273,\ + 167 0.191090 0.708366 0.482284,\ + 168 0.196571 0.711827 0.479221,\ + 169 0.202219 0.715272 0.476084,\ + 170 0.208030 0.718701 0.472873,\ + 171 0.214000 0.722114 0.469588,\ + 172 0.220124 0.725509 0.466226,\ + 173 0.226397 0.728888 0.462789,\ + 174 0.232815 0.732247 0.459277,\ + 175 0.239374 0.735588 0.455688,\ + 176 0.246070 0.738910 0.452024,\ + 177 0.252899 0.742211 0.448284,\ + 178 0.259857 0.745492 0.444467,\ + 179 0.266941 0.748751 0.440573,\ + 180 0.274149 0.751988 0.436601,\ + 181 0.281477 0.755203 0.432552,\ + 182 0.288921 0.758394 0.428426,\ + 183 0.296479 0.761561 0.424223,\ + 184 0.304148 0.764704 0.419943,\ + 185 0.311925 0.767822 0.415586,\ + 186 0.319809 0.770914 0.411152,\ + 187 0.327796 0.773980 0.406640,\ + 188 0.335885 0.777018 0.402049,\ + 189 0.344074 0.780029 0.397381,\ + 190 0.352360 0.783011 0.392636,\ + 191 0.360741 0.785964 0.387814,\ + 192 0.369214 0.788888 0.382914,\ + 193 0.377779 0.791781 0.377939,\ + 194 0.386433 0.794644 0.372886,\ + 195 0.395174 0.797475 0.367757,\ + 196 0.404001 0.800275 0.362552,\ + 197 0.412913 0.803041 0.357269,\ + 198 0.421908 0.805774 0.351910,\ + 199 0.430983 0.808473 0.346476,\ + 200 0.440137 0.811138 0.340967,\ + 201 0.449368 0.813768 0.335384,\ + 202 0.458674 0.816363 0.329727,\ + 203 0.468053 0.818921 0.323998,\ + 204 0.477504 0.821444 0.318195,\ + 205 0.487026 0.823929 0.312321,\ + 206 0.496615 0.826376 0.306377,\ + 207 0.506271 0.828786 0.300362,\ + 208 0.515992 0.831158 0.294279,\ + 209 0.525776 0.833491 0.288127,\ + 210 0.535621 0.835785 0.281908,\ + 211 0.545524 0.838039 0.275626,\ + 212 0.555484 0.840254 0.269281,\ + 213 0.565498 0.842430 0.262877,\ + 214 0.575563 0.844566 0.256415,\ + 215 0.585678 0.846661 0.249897,\ + 216 0.595839 0.848717 0.243329,\ + 217 0.606045 0.850733 0.236712,\ + 218 0.616293 0.852709 0.230052,\ + 219 0.626579 0.854645 0.223353,\ + 220 0.636902 0.856542 0.216620,\ + 221 0.647257 0.858400 0.209861,\ + 222 0.657642 0.860219 0.203082,\ + 223 0.668054 0.861999 0.196293,\ + 224 0.678489 0.863742 0.189503,\ + 225 0.688944 0.865448 0.182725,\ + 226 0.699415 0.867117 0.175971,\ + 227 0.709898 0.868751 0.169257,\ + 228 0.720391 0.870350 0.162603,\ + 229 0.730889 0.871916 0.156029,\ + 230 0.741388 0.873449 0.149561,\ + 231 0.751884 0.874951 0.143228,\ + 232 0.762373 0.876424 0.137064,\ + 233 0.772852 0.877868 0.131109,\ + 234 0.783315 0.879285 0.125405,\ + 235 0.793760 0.880678 0.120005,\ + 236 0.804182 0.882046 0.114965,\ + 237 0.814576 0.883393 0.110347,\ + 238 0.824940 0.884720 0.106217,\ + 239 0.835270 0.886029 0.102646,\ + 240 0.845561 0.887322 0.099702,\ + 241 0.855810 0.888601 0.097452,\ + 242 0.866013 0.889868 0.095953,\ + 243 0.876168 0.891125 0.095250,\ + 244 0.886271 0.892374 0.095374,\ + 245 0.896320 0.893616 0.096335,\ + 246 0.906311 0.894855 0.098125,\ + 247 0.916242 0.896091 0.100717,\ + 248 0.926106 0.897330 0.104071,\ + 249 0.935904 0.898570 0.108131,\ + 250 0.945636 0.899815 0.112838,\ + 251 0.955300 0.901065 0.118128,\ + 252 0.964894 0.902323 0.123941,\ + 253 0.974417 0.903590 0.130215,\ + 254 0.983868 0.904867 0.136897,\ + 255 0.993248 0.906157 0.143936) + `) + fmt.Fprintf(&b, "set cbrange [0:255]\n") + if maxBytes > 2 { + fmt.Fprintf(&b, "set cbtics ('0' 0, '%d' 128, '%d' 255)\n", maxBytes/2, maxBytes) + } else { + fmt.Fprintf(&b, "set cbtics ('0' 0, '%d' 255)\n", maxBytes) + } + return b.String() +} + +func (d LinearViridisDistribution) Name() string { + return "linear_viridis" +} + +type LogarithmDistribution struct{} + +func (d LogarithmDistribution) Map(value int, maxBytes int) int { + return value +} + +func (d LogarithmDistribution) GnuplotConfig(minBytes int, maxBytes int) string { + b := strings.Builder{} + fmt.Fprintf(&b, "set palette defined( 1 'white', %d 'black' )\n", maxBytes) + fmt.Fprintf(&b, "set logscale cb\n") + fmt.Fprintf(&b, "set cbrange [1:%d]\n", maxBytes) + // generate ticks + fmt.Fprintf(&b, "set cbtics (1") + for val := 10; val < maxBytes; val *= 10 { + fmt.Fprintf(&b, ", %d", val) + } + fmt.Fprintf(&b, ", %d)\n", maxBytes) + return b.String() +} + +func (d LogarithmDistribution) Name() string { + return "logarithm" +} + +type QuadraticDistribution struct{} + +func (d QuadraticDistribution) Map(value int, maxBytes int) int { + return int(256.0 * math.Sqrt(float64(value)) / math.Sqrt(float64(maxBytes))) +} + +func (d QuadraticDistribution) GnuplotConfig(minBytes int, maxBytes int) string { + b := strings.Builder{} + fmt.Fprintf(&b, "set palette defined(\\\n") + for i := 0; i <= 255; i += 1 { + color := 1.0 - math.Sqrt(float64(i)/256.0) + if i == 255 { + fmt.Fprintf(&b, "%d %f %f %f)\n", i, color, color, color) + } else { + fmt.Fprintf(&b, "%d %f %f %f,\\\n", i, color, color, color) + } + } + fmt.Fprintf(&b, "set cbrange [0:255]\n") + // generate ticks + fmt.Fprintf(&b, "set cbtics ('0' 0, '%d' 64, '%d' 128, '%d' 255)\n", maxBytes/2, int(float64(maxBytes)*math.Sqrt(2)/2.0), maxBytes) + return b.String() +} + +func (d QuadraticDistribution) Name() string { + return "quadratic" +} + +// AllPatterns plots the sum of all patterns found during the post-mortem analysis +func AllPatterns(dir string, outputDir string, numRanks int, patterns map[int]patterns.Data, locationsData []*location.Data) error { + gnuplotScripts, err := generateAllPatternsDataFiles(dir, outputDir, numRanks, patterns, locationsData) + if err != nil { + return fmt.Errorf("generateAllPatternsDataFiles() failed: %s", err) + } + + for _, gnuplotScript := range gnuplotScripts { + err = runGnuplot(gnuplotScript, outputDir) + if err != nil { + return fmt.Errorf("runGnuplot() failed: %s", err) + } + } + + return nil +} diff --git a/tools/internal/pkg/profiler/profiler.go b/tools/internal/pkg/profiler/profiler.go index 7f012be7..8dc34b82 100644 --- a/tools/internal/pkg/profiler/profiler.go +++ b/tools/internal/pkg/profiler/profiler.go @@ -891,6 +891,7 @@ type step1ResultsT struct { type step3ResultsT struct { rankFileData map[int]*location.RankFileData callMaps map[int]maps.CallsDataT + locationData []*location.Data globalSendHeatMap map[int]int globalRecvHeatMap map[int]int rankNumCallsMap map[int]int @@ -1010,7 +1011,7 @@ func (cfg *PostmortemConfig) Analyze() error { } t := timer.Start() resultsStep3 = new(step3ResultsT) - resultsStep3.rankFileData, resultsStep3.callMaps, resultsStep3.globalSendHeatMap, resultsStep3.globalRecvHeatMap, resultsStep3.rankNumCallsMap, err = maps.Create(cfg.CodeBaseDir, cfg.CollectiveName, maps.Heat, cfg.DatasetDir, resultsStep1.allCallsData) + resultsStep3.rankFileData, resultsStep3.callMaps, resultsStep3.locationData, resultsStep3.globalSendHeatMap, resultsStep3.globalRecvHeatMap, resultsStep3.rankNumCallsMap, err = maps.Create(cfg.CodeBaseDir, cfg.CollectiveName, maps.Heat, cfg.DatasetDir, resultsStep1.allCallsData) if err != nil { return fmt.Errorf("unable to create heat map: %s", err) } @@ -1118,6 +1119,19 @@ func (cfg *PostmortemConfig) Analyze() error { if err != nil { return fmt.Errorf("unable to plot average data: %s", err) } + + fmt.Printf("* Generating graphs for heavy patterns\n") + err = plot.HeavyPatterns(cfg.DatasetDir, cfg.DatasetDir, resultsStep1.allPatterns) + if err != nil { + return fmt.Errorf("unable to plot heavy patterns data: %s", err) + } + + fmt.Printf("* Generating graphs for sum of all patterns\n") + err = plot.AllPatterns(cfg.DatasetDir, cfg.DatasetDir, len(resultsStep3.rankFileData[0].RankMap), resultsStep1.allPatterns, resultsStep3.locationData) + if err != nil { + return fmt.Errorf("unable to plot all patterns data: %s", err) + } + duration := t.Stop() fmt.Printf("Step completed in %s\n", duration) } else { @@ -1149,3 +1163,27 @@ func (cfg *PostmortemConfig) Analyze() error { return nil } + +func FindHeavyPatternsFile(dir string) ([]int, error) { + m := make(map[int]struct{}, 0) + err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + filename := filepath.Base(path) + if strings.HasPrefix(filename, "heavy_patterns_index") && strings.HasSuffix(filename, ".png") { + index := 0 + fmt.Sscanf(filename, "heavy_patterns_index%d.png", &index) + m[index] = struct{}{} + } + return nil + }) + + res := make([]int, 0) + for key := range m { + res = append(res, key) + } + sort.Ints(res) + + return res, err +} diff --git a/tools/internal/pkg/scale/scale_mapfloat64s.go b/tools/internal/pkg/scale/scale_mapfloat64s.go index 07b80669..f3af4c28 100644 --- a/tools/internal/pkg/scale/scale_mapfloat64s.go +++ b/tools/internal/pkg/scale/scale_mapfloat64s.go @@ -84,7 +84,7 @@ func MapFloat64s(unitID string, values map[int]float64) (string, map[int]float64 if len(sortedValues) >= 2 && sortedValues[0] >= 0 && sortedValues[len(values)-1] <= 1 { // We scale down all the values if possible - // Translate the human reading unit into something we can inteprete + // Translate the human reading unit into something we can interpret unitType, unitScale := unit.FromString(unitID) unitType, unitScale, newValues := mapFloat64sScaleDown(unitType, unitScale, values) newUnitID := unit.ToString(unitType, unitScale) diff --git a/tools/internal/pkg/unit/unit.go b/tools/internal/pkg/unit/unit.go index 8afc9c11..31b58212 100644 --- a/tools/internal/pkg/unit/unit.go +++ b/tools/internal/pkg/unit/unit.go @@ -114,18 +114,18 @@ func IsMax(unitType int, unitScale int) bool { case DATA: internalUnitData := getDataUnits() _, ok := internalUnitData[unitScale+1] - return ok + return !ok case TIME: internalUnitData := getTimeUnits() _, ok := internalUnitData[unitScale+1] - return ok + return !ok case BW: internalUnitData := getBWUnits() _, ok := internalUnitData[unitScale+1] - return ok + return !ok } - return false + return true } // IsMin checks if a unit can be scaled down further diff --git a/tools/internal/pkg/webui/templates/allPatterns.html b/tools/internal/pkg/webui/templates/allPatterns.html new file mode 100644 index 00000000..d419b1d5 --- /dev/null +++ b/tools/internal/pkg/webui/templates/allPatterns.html @@ -0,0 +1,45 @@ +

Sum of All Patterns

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Simple + +
Linear Viridis + +
Linear + +
+ Linear2 + + +
+ Logarithm + + +
+ Quadratic + + +
\ No newline at end of file diff --git a/tools/internal/pkg/webui/templates/callsLayout.html b/tools/internal/pkg/webui/templates/callsLayout.html index f56e466e..5e0610b1 100644 --- a/tools/internal/pkg/webui/templates/callsLayout.html +++ b/tools/internal/pkg/webui/templates/callsLayout.html @@ -91,8 +91,10 @@

Profile: {{ .PageTitle }}

+ + + \ No newline at end of file diff --git a/tools/internal/pkg/webui/templates/index.html b/tools/internal/pkg/webui/templates/index.html index e1897e42..31132c94 100644 --- a/tools/internal/pkg/webui/templates/index.html +++ b/tools/internal/pkg/webui/templates/index.html @@ -51,6 +51,8 @@
+ +
@@ -60,6 +62,14 @@
+ +
+ +
+ +
+ +