From 72dc6606ffefd59db2419295c0ebd8105117bbe0 Mon Sep 17 00:00:00 2001 From: mhouston Date: Fri, 17 Jul 2015 10:59:16 -0700 Subject: [PATCH 1/2] Fix outside loop to loop for full tree depth --- src/caffe/parallel.cpp | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/caffe/parallel.cpp b/src/caffe/parallel.cpp index a39a2319bce..75342734252 100644 --- a/src/caffe/parallel.cpp +++ b/src/caffe/parallel.cpp @@ -119,8 +119,11 @@ void DevicePair::compute(const vector devices, vector* pairs) { #ifndef CPU_ONLY vector remaining(devices); - // Group GPUs by board - some boards can have more than 2 ASICs - for (int d = 0; d < remaining.size(); ++d) { + // Depth for reduction tree + int remaining_depth = (int)ceil(log2(remaining.size())); + + // Group GPUs by board + for (int d = 0; d < remaining_depth; ++d) { for (int i = 0; i < remaining.size(); ++i) { for (int j = i + 1; j < remaining.size(); ++j) { cudaDeviceProp a, b; @@ -144,8 +147,9 @@ void DevicePair::compute(const vector devices, vector* pairs) { } DLOG(INFO) << "GPUs paired by boards, remaining: " << s.str(); - // Group by P2P accessibility - P2P group can be larger than 4 boards - for (int d = 0; d < remaining.size(); ++d) { + // Group by P2P accessibility + remaining_depth = ceil(log2(remaining.size())); + for (int d = 0; d < remaining_depth; ++d) { for (int i = 0; i < remaining.size(); ++i) { for (int j = i + 1; j < remaining.size(); ++j) { int access; @@ -169,18 +173,19 @@ void DevicePair::compute(const vector devices, vector* pairs) { DLOG(INFO) << "GPUs paired by P2P access, remaining: " << s.str(); // Group remaining - for (int d = 0; d < remaining.size(); ++d) { // try to pair everyone + remaining_depth = ceil(log2(remaining.size())); + for (int d = 0; d < remaining_depth; ++d) { for (int i = 0; i < remaining.size(); ++i) { - for (int j = i + 1; j < remaining.size(); ++j) { - pairs->push_back(DevicePair(remaining[i], remaining[j])); + pairs->push_back(DevicePair(remaining[i], remaining[i+1])); DLOG(INFO) << "Remaining pair: " << remaining[i] - << ":" << remaining[j]; - remaining.erase(remaining.begin() + j); - break; - } + << ":" << remaining[i+1]; + remaining.erase(remaining.begin() + i+1); } } + + // Should only be the parent node remaining CHECK_EQ(remaining.size(), 1); + pairs->insert(pairs->begin(), DevicePair(-1, remaining[0])); CHECK(pairs->size() == devices.size()); From c21420ccd9ee3b1221435b0a78a9354a161025ec Mon Sep 17 00:00:00 2001 From: mhouston Date: Mon, 20 Jul 2015 13:36:40 -0700 Subject: [PATCH 2/2] Fix lint errors --- src/caffe/parallel.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/caffe/parallel.cpp b/src/caffe/parallel.cpp index 75342734252..f32818060c7 100644 --- a/src/caffe/parallel.cpp +++ b/src/caffe/parallel.cpp @@ -120,9 +120,9 @@ void DevicePair::compute(const vector devices, vector* pairs) { vector remaining(devices); // Depth for reduction tree - int remaining_depth = (int)ceil(log2(remaining.size())); + int remaining_depth = static_cast(ceil(log2(remaining.size()))); - // Group GPUs by board + // Group GPUs by board for (int d = 0; d < remaining_depth; ++d) { for (int i = 0; i < remaining.size(); ++i) { for (int j = i + 1; j < remaining.size(); ++j) { @@ -174,8 +174,8 @@ void DevicePair::compute(const vector devices, vector* pairs) { // Group remaining remaining_depth = ceil(log2(remaining.size())); - for (int d = 0; d < remaining_depth; ++d) { - for (int i = 0; i < remaining.size(); ++i) { + for (int d = 0; d < remaining_depth; ++d) { + for (int i = 0; i < remaining.size(); ++i) { pairs->push_back(DevicePair(remaining[i], remaining[i+1])); DLOG(INFO) << "Remaining pair: " << remaining[i] << ":" << remaining[i+1]; @@ -185,7 +185,7 @@ void DevicePair::compute(const vector devices, vector* pairs) { // Should only be the parent node remaining CHECK_EQ(remaining.size(), 1); - + pairs->insert(pairs->begin(), DevicePair(-1, remaining[0])); CHECK(pairs->size() == devices.size());