Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tools/cmd/map/map.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func main() {
collectiveName := "alltoallv" // harcoded for now, detection coming soon

// We do not care about the data returned by Create, we only care here about the files that are generated.
_, _, _, _, _, err := maps.Create(codeBaseDir, collectiveName, maps.Heat, *dir, nil)
_, _, _, _, _, _, err := maps.Create(codeBaseDir, collectiveName, maps.Heat, *dir, nil)
if err != nil {
fmt.Printf("ERROR: unable to create heat map: %s", err)
os.Exit(1)
Expand Down
6 changes: 6 additions & 0 deletions tools/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,10 @@ github.com/gomarkdown/markdown v0.0.0-20200609195525-3f9352745725 h1:X6sZdr+t2E2
github.com/gomarkdown/markdown v0.0.0-20200609195525-3f9352745725/go.mod h1:aii0r/K0ZnHv7G0KF7xy1v0A7s2Ljrb5byB7MO5p6TU=
github.com/gvallee/go_util v1.0.1 h1:Ch/PpAlHrHNmL2Upaxif/Nt4CqtaazDyTXh5fIhutJo=
github.com/gvallee/go_util v1.0.1/go.mod h1:fTexpwdH/n05Ziu0TXJIQsr7E+46QpBxNdeOOsyC0/s=
github.com/lucasb-eyer/go-colorful v1.0.3 h1:QIbQXiugsb+q10B+MI+7DI1oQLdmnep86tWFlaaUAac=
github.com/lucasb-eyer/go-colorful v1.0.3/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/mazznoer/colorgrad v0.8.1 h1:Bw/ks+KujOOg9E6YQvPqSqTLryiFnwliAH5VMZarSTI=
github.com/mazznoer/colorgrad v0.8.1/go.mod h1:xCjvoNkXHJIAPOUMSMrXkFdxTGQqk8zMYS3e5hSLghA=
github.com/mazznoer/csscolorparser v0.1.0 h1:xUf1uzU1r24JleIIb2Kz3bl7vATStxy53gm67yuPP+c=
github.com/mazznoer/csscolorparser v0.1.0/go.mod h1:Aj22+L/rYN/Y6bj3bYqO3N6g1dtdHtGfQ32xZ5PJQic=
golang.org/dl v0.0.0-20190829154251-82a15e2f2ead/go.mod h1:IUMfjQLJQd4UTqG1Z90tenwKoCX93Gn3MAQJMOSBsDQ=
74 changes: 52 additions & 22 deletions tools/internal/pkg/maps/maps.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,16 @@ func saveGlobalHeatMap(codeBaseDir string, heatmap map[int]int, filepath string)
}
}
defer fd.Close()
for key, value := range heatmap {

// sort heatmap by rank
var sortedKey []int
for k := range heatmap {
sortedKey = append(sortedKey, k)
}
sort.Ints(sortedKey)

for _, key := range sortedKey {
value := heatmap[key]
_, err := fd.WriteString(fmt.Sprintf("Rank %d: %d bytes\n", key, value))
if err != nil {
return err
Expand Down Expand Up @@ -176,7 +185,15 @@ func saveHostHeatMap(codeBaseDir string, heatMap map[string]int, filepath string
return err
}

for key, value := range heatMap {
// sort heatMap by key
keys := make([]string, 0)
for k := range heatMap {
keys = append(keys, k)
}
sort.Strings(keys)

for _, key := range keys {
value := heatMap[key]
_, err := fd.WriteString(fmt.Sprintf("Host %s: %d bytes\n", key, value))
if err != nil {
return err
Expand Down Expand Up @@ -341,42 +358,43 @@ func createHeatMap(codeBaseDir string, collectiveName string, dir string, leadRa
return nil
}

func commCreate(codeBaseDir string, collectiveName string, dir string, leadRank int, allCallsData map[int]*counts.CallData, globalSendHeatMap map[int]int, globalRecvHeatMap map[int]int, rankNumCallsMap map[int]int) (*location.RankFileData, CallsDataT, error) {
func commCreate(codeBaseDir string, collectiveName string, dir string, leadRank int, allCallsData map[int]*counts.CallData, globalSendHeatMap map[int]int, globalRecvHeatMap map[int]int, rankNumCallsMap map[int]int) (*location.RankFileData, []*location.Data, CallsDataT, error) {
commMaps := CallsDataT{
SendHeatMap: map[int]map[int]int{},
RecvHeatMap: map[int]map[int]int{},
}
var rankFileData *location.RankFileData
var rankData []*location.Data
var err error
rankFileData, _, commMaps.RanksMap, err = prepareRanksMap(codeBaseDir, dir)
rankFileData, _, commMaps.RanksMap, rankData, err = prepareRanksMap(codeBaseDir, dir)
if err != nil {
return nil, commMaps, err
return nil, nil, commMaps, err
}

err = createHeatMap(codeBaseDir, collectiveName, dir, leadRank, rankFileData, allCallsData, &commMaps, globalSendHeatMap, globalRecvHeatMap, rankNumCallsMap)
if err != nil {
return rankFileData, commMaps, err
return rankFileData, rankData, commMaps, err
}

// Save the heat maps for the entire execution
globalSendHeatMapFilePath := filepath.Join(dir, GlobalHeatMapPrefix+"-send.md")
err = saveGlobalHeatMap(codeBaseDir, globalSendHeatMap, globalSendHeatMapFilePath)
if err != nil {
return rankFileData, commMaps, err
return rankFileData, rankData, commMaps, err
}

globalRecvHeatMapFilePath := filepath.Join(dir, GlobalHeatMapPrefix+"-recv.md")
err = saveGlobalHeatMap(codeBaseDir, globalRecvHeatMap, globalRecvHeatMapFilePath)
if err != nil {
return rankFileData, commMaps, err
return rankFileData, rankData, commMaps, err
}

return rankFileData, commMaps, nil
return rankFileData, rankData, commMaps, nil
}

// Create is the main function to create heat maps. The id identifies what type of maps
// need to be created.
func Create(codeBaseDir string, collectiveName string, id int, dir string, allCallsData []counts.CommDataT) (map[int]*location.RankFileData, map[int]CallsDataT, map[int]int, map[int]int, map[int]int, error) {
func Create(codeBaseDir string, collectiveName string, id int, dir string, allCallsData []counts.CommDataT) (map[int]*location.RankFileData, map[int]CallsDataT, []*location.Data, map[int]int, map[int]int, map[int]int, error) {
switch id {
case Heat:
var err error
Expand All @@ -385,33 +403,35 @@ func Create(codeBaseDir string, collectiveName string, id int, dir string, allCa
globalCallsData := make(map[int]CallsDataT)
// fixme: RankFileData is supposed to be static and dealing with ranks on comm world, no need to track per lead rank
globalCommRankFileData := make(map[int]*location.RankFileData)
// all calls have the same location data
globalCommData := make([]*location.Data, 0)
globalSendHeatMap := make(map[int]int) // The comm world rank is the key, the value amount of data sent to it
globalRecvHeatMap := make(map[int]int)

for _, commData := range allCallsData {
globalCommRankFileData[commData.LeadRank], globalCallsData[commData.LeadRank], err = commCreate(codeBaseDir, collectiveName, dir, commData.LeadRank, commData.CallData, globalSendHeatMap, globalRecvHeatMap, rankNumCallsMap)
globalCommRankFileData[commData.LeadRank], globalCommData, globalCallsData[commData.LeadRank], err = commCreate(codeBaseDir, collectiveName, dir, commData.LeadRank, commData.CallData, globalSendHeatMap, globalRecvHeatMap, rankNumCallsMap)
if err != nil {
return nil, nil, nil, nil, nil, err
return nil, nil, nil, nil, nil, nil, err
}
}

// Save the heat maps for the entire execution
globalSendHeatMapFilePath := filepath.Join(dir, GlobalHeatMapPrefix+"-send.md")
err = saveGlobalHeatMap(codeBaseDir, globalSendHeatMap, globalSendHeatMapFilePath)
if err != nil {
return nil, nil, nil, nil, nil, err
return nil, nil, nil, nil, nil, nil, err
}

globalRecvHeatMapFilePath := filepath.Join(dir, GlobalHeatMapPrefix+"-recv.md")
err = saveGlobalHeatMap(codeBaseDir, globalRecvHeatMap, globalRecvHeatMapFilePath)
if err != nil {
return nil, nil, nil, nil, nil, err
return nil, nil, nil, nil, nil, nil, err
}

return globalCommRankFileData, globalCallsData, globalSendHeatMap, globalRecvHeatMap, rankNumCallsMap, nil
return globalCommRankFileData, globalCallsData, globalCommData, globalSendHeatMap, globalRecvHeatMap, rankNumCallsMap, nil
}

return nil, nil, nil, nil, nil, fmt.Errorf("unknown map type: %d", id)
return nil, nil, nil, nil, nil, nil, fmt.Errorf("unknown map type: %d", id)
}

func saveProcessedLocationData(dir string, leadRank int, info map[int]int) error {
Expand Down Expand Up @@ -481,7 +501,15 @@ func createRankFile(dir string, hm *location.RankFileData) error {
return err
}

for host, rankList := range hm.HostMap {
// sort hm.HostMap by key
keys := make([]string, 0)
for k := range hm.HostMap {
keys = append(keys, k)
}
sort.Strings(keys)

for _, host := range keys {
rankList := hm.HostMap[host]
sort.Ints(rankList)
_, err = fd.WriteString(fmt.Sprintf("Host %s - %d ranks: %s\n", host, len(rankList), notation.CompressIntArray(rankList)))
if err != nil {
Expand All @@ -492,7 +520,7 @@ func createRankFile(dir string, hm *location.RankFileData) error {
return nil
}

func prepareRanksMap(codeBaseDir string, dir string) (*location.RankFileData, map[int][]*location.RankLocation, map[int]map[int]int, error) {
func prepareRanksMap(codeBaseDir string, dir string) (*location.RankFileData, map[int][]*location.RankLocation, map[int]map[int]int, []*location.Data, error) {
callMap := make(map[int][]*location.RankLocation)
callsRanksMap := make(map[int]map[int]int)
// This is to track the files for a specific communicator
Expand All @@ -503,7 +531,7 @@ func prepareRanksMap(codeBaseDir string, dir string) (*location.RankFileData, ma
// Find all the location files
f, err := ioutil.ReadDir(dir)
if err != nil {
return nil, nil, nil, err
return nil, nil, nil, nil, err
}
var locationFiles []string
for _, file := range f {
Expand All @@ -513,13 +541,15 @@ func prepareRanksMap(codeBaseDir string, dir string) (*location.RankFileData, ma
locationFiles = append(locationFiles, filepath.Join(dir, filename))
}
}
locationsDataList := make([]*location.Data, 0)

// Parse each file and aggregate the results from each file.
for _, locationFile := range locationFiles {
callsData, locationsData, err := location.ParseLocationFile(codeBaseDir, locationFile)
if err != nil {
return nil, nil, nil, err
return nil, nil, nil, nil, err
}
locationsDataList = append(locationsDataList, locationsData)
for callID := range callsData {
if _, ok := callsRanksMap[callID]; !ok {
// Transform the array of locations into a map
Expand Down Expand Up @@ -547,10 +577,10 @@ func prepareRanksMap(codeBaseDir string, dir string) (*location.RankFileData, ma

err = createRankFile(dir, hm)
if err != nil {
return hm, nil, nil, err
return hm, nil, nil, nil, err
}

return hm, callMap, callsRanksMap, nil
return hm, callMap, callsRanksMap, locationsDataList, nil
}

// CreateAvgMaps uses the send and receive counts to create an average heat map of the data that is sent/received
Expand Down
62 changes: 62 additions & 0 deletions tools/internal/pkg/patterns/patterns.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"os"
"path/filepath"
"reflect"
"sort"
"strings"

"github.com/gvallee/alltoallv_profiling/tools/internal/pkg/counts"
Expand All @@ -34,6 +35,18 @@ type CallData struct {
Calls []int
}

type HeavyPattern struct {
// The number of calls
Occurrence int

// Raw string presentation of counts
RawCounts string

// Counts are the counts for all ranks involved in the operation
// The key is the rank sending/receiving the data and the value an array of integers representing counts for each destination/source
Counts map[int][]int
}

// Data holds the data all the patterns the infrastructure was able to detect
type Data struct {
// AllPatterns is the data for all the patterns that have been detected
Expand All @@ -50,6 +63,19 @@ type Data struct {

// Empty is the data of all the patterns that do not exchange any data (all counts are equal to 0)
Empty []*CallData

// HeavyPatterns is the list of patterns sorted by occurrence
HeavyPatterns []HeavyPattern
}

// Convert raw counts to string to store the slice as key
func rawCountsToKey(counts []string) string {
var buf strings.Builder
for _, count := range counts {
fmt.Fprintf(&buf, "%s\n", count)
}

return buf.String()
}

func CompareCallPatterns(p1 map[int]int, p2 map[int]int) bool {
Expand Down Expand Up @@ -189,6 +215,11 @@ func GetSummaryFilePath(basedir string, jobid int, rank int) string {
return filepath.Join(basedir, fmt.Sprintf("%sjob%d-rank%d.md", SummaryFilePrefix, jobid, rank))
}

// GetHeavyFilePath returns the full path to the heavy pattern file
func GetHeavyFilePath(basedir string) string {
return filepath.Join(basedir, fmt.Sprintf("heavy-patterns.md"))
}

func getPatterns(reader *bufio.Reader) (string, error) {
patterns := ""

Expand Down Expand Up @@ -496,6 +527,10 @@ func ParseFiles(sendCountsFile string, recvCountsFile string, numCalls int, rank
return nil, patterns, fmt.Errorf("counts.LoadCallsData() did not return any data")
}

// collect heavy patterns
// the key is RawCount
var heavyPattern = make(map[string]HeavyPattern)

b := progress.NewBar(numCalls, "Analyzing alltoallv calls")
defer progress.EndBar(b)
for i := 0; i < numCalls; i++ {
Expand All @@ -518,6 +553,20 @@ func ParseFiles(sendCountsFile string, recvCountsFile string, numCalls int, rank
return nil, patterns, fmt.Errorf("no recv patterns available")
}

// Analyze heavy pattern
key := rawCountsToKey(callData[i].SendData.RawCounts)
pattern, ok := heavyPattern[key]
if !ok {
// does not exist
pattern.RawCounts = key
for _, counts := range callData[i].SendData.Counts {
// use the first map, all values are the same
pattern.Counts = counts
}
}
pattern.Occurrence += 1
heavyPattern[key] = pattern

// Analyze the send/receive pattern from the call
err := patterns.addPattern(i, callData[i].SendData.Statistics.Patterns, callData[i].RecvData.Statistics.Patterns)
if err != nil {
Expand All @@ -537,6 +586,18 @@ func ParseFiles(sendCountsFile string, recvCountsFile string, numCalls int, rank
return nil, patterns, fmt.Errorf("extracted data of %d calls instead of %d", len(callData), numCalls)
}

// sort heavy patterns by occurrence
for _, val := range heavyPattern {
patterns.HeavyPatterns = append(patterns.HeavyPatterns, val)
}
sort.Slice(patterns.HeavyPatterns, func(i, j int) bool {
if patterns.HeavyPatterns[i].Occurrence != patterns.HeavyPatterns[j].Occurrence {
return patterns.HeavyPatterns[i].Occurrence > patterns.HeavyPatterns[j].Occurrence
} else {
return patterns.HeavyPatterns[i].RawCounts > patterns.HeavyPatterns[j].RawCounts
}
})

return callData, patterns, nil
}

Expand All @@ -555,6 +616,7 @@ func WriteData(patternsFd *os.File, patternsSummaryFd *os.File, patternsData Dat
num++
}

// patterns summary
if !NoSummary(patternsData) {
if len(patternsData.OneToN) != 0 {
_, err := patternsSummaryFd.WriteString("# 1 to N patterns\n\n")
Expand Down
Loading