diff --git a/include/flucoma/algorithms/public/MLP.hpp b/include/flucoma/algorithms/public/MLP.hpp index 57ce6edb..135a8957 100644 --- a/include/flucoma/algorithms/public/MLP.hpp +++ b/include/flucoma/algorithms/public/MLP.hpp @@ -10,14 +10,15 @@ under the European Union’s Horizon 2020 research and innovation programme #pragma once +#include "../util/EigenRandom.hpp" #include "../util/FluidEigenMappings.hpp" #include "../util/NNFuncs.hpp" #include "../util/NNLayer.hpp" #include "../../data/FluidDataSet.hpp" #include "../../data/FluidIndex.hpp" +#include "../../data/FluidMemory.hpp" #include "../../data/FluidTensor.hpp" #include "../../data/TensorTypes.hpp" -#include "../../data/FluidMemory.hpp" #include #include @@ -30,9 +31,8 @@ class MLP using ArrayXXd = Eigen::ArrayXXd; public: - void init(index inputSize, index outputSize, - FluidTensor hiddenSizes, index hiddenAct, index outputAct) + FluidTensor hiddenSizes, index hiddenAct, index outputAct, index seed) { mLayers.clear(); std::vector sizes = {inputSize}; @@ -50,7 +50,7 @@ class MLP mLayers.push_back(NNLayer(sizes[asUnsigned(i)], sizes[asUnsigned(i + 1)], activations[asUnsigned(i)])); } - for (auto&& l : mLayers) l.init(); + for (auto&& l : mLayers) l.init(seed); mInitialized = true; mTrained = false; } @@ -77,7 +77,6 @@ class MLP void clear() { - for (auto&& l : mLayers) l.init(); mInitialized = false; mTrained = false; } @@ -218,6 +217,7 @@ class MLP bool mInitialized{false}; bool mTrained{false}; index mMaxLayerSize; + RandomSeed mSeed; }; } // namespace algorithm } // namespace fluid diff --git a/include/flucoma/algorithms/public/SGD.hpp b/include/flucoma/algorithms/public/SGD.hpp index feec7de7..b1c5c4f9 100644 --- a/include/flucoma/algorithms/public/SGD.hpp +++ b/include/flucoma/algorithms/public/SGD.hpp @@ -32,10 +32,10 @@ class SGD double train(MLP& model, InputRealMatrixView in, RealMatrixView out, index nIter, index batchSize, double learningRate, - double momentum, double valFrac) + double momentum, double valFrac, index seed) { return train(model, in, out, - SimpleDataSampler(in.rows(), batchSize, valFrac, true), nIter, + SimpleDataSampler(in.rows(), batchSize, valFrac, true, seed), nIter, learningRate, momentum); } @@ -47,6 +47,7 @@ class SGD using namespace _impl; using namespace std; using namespace Eigen; + MLP originalModel(model); index nExamples = in.rows(); // index inputSize = in.cols(); index outputSize = out.cols(); @@ -104,7 +105,9 @@ class SGD bool isNan = !((finalPred == finalPred)).all(); if (isNan) { - model.clear(); + using std::swap; + //just return model to exactly its pre-call state + swap(model,originalModel); return -1; } error = model.loss(finalPred, output); diff --git a/include/flucoma/algorithms/util/NNLayer.hpp b/include/flucoma/algorithms/util/NNLayer.hpp index 25443e00..14bcb03b 100644 --- a/include/flucoma/algorithms/util/NNLayer.hpp +++ b/include/flucoma/algorithms/util/NNLayer.hpp @@ -11,6 +11,7 @@ under the European Union’s Horizon 2020 research and innovation programme #pragma once #include "NNFuncs.hpp" +#include "../util/EigenRandom.hpp" #include "../../data/FluidIndex.hpp" #include "../../data/FluidMemory.hpp" #include "../../data/TensorTypes.hpp" @@ -46,10 +47,11 @@ class NNLayer initGrads(); } - void init() + void init(RandomSeed seed) { double dev = std::sqrt(6.0 / (mWeights.rows() + mWeights.cols())); - mWeights = dev * MatrixXd::Random(mWeights.rows(), mWeights.cols()).array(); + mWeights = EigenRandom(mWeights.rows(), mWeights.cols(), seed, + Range{-dev, dev}); mBiases = VectorXd::Zero(mWeights.cols()); initGrads(); } diff --git a/include/flucoma/clients/nrt/MLPClassifierClient.hpp b/include/flucoma/clients/nrt/MLPClassifierClient.hpp index 5965acb1..b2467c68 100644 --- a/include/flucoma/clients/nrt/MLPClassifierClient.hpp +++ b/include/flucoma/clients/nrt/MLPClassifierClient.hpp @@ -31,7 +31,7 @@ struct MLPClassifierData index dims() const { return mlp.dims(); } void clear() { - mlp.clear(); + mlp.clear(); encoder.clear(); } bool initialized() const { return mlp.initialized(); } @@ -66,7 +66,8 @@ constexpr auto MLPClassifierParams = defineParameters( FloatParam("learnRate", "Learning Rate", 0.01, Min(0.0), Max(1.0)), FloatParam("momentum", "Momentum", 0.5, Min(0.0), Max(0.99)), LongParam("batchSize", "Batch Size", 50, Min(1)), - FloatParam("validation", "Validation Amount", 0.2, Min(0), Max(0.9))); + FloatParam("validation", "Validation Amount", 0.2, Min(0), Max(0.9)), + LongParam("seed", "Random Seed", -1)); class MLPClassifierClient : public FluidBaseClient, @@ -83,7 +84,8 @@ class MLPClassifierClient : public FluidBaseClient, kRate, kMomentum, kBatchSize, - kVal + kVal, + kRandomSeed }; public: @@ -161,9 +163,10 @@ class MLPClassifierClient : public FluidBaseClient, { mAlgorithm.mlp.init(sourceDataSet.pointSize(), mAlgorithm.encoder.numLabels(), get(), - get(), 1); // sigmoid output + get(), 1, + get()); // sigmoid output } - + if (auto missingIDs = sourceDataSet.checkIDs(targetDataSet); missingIDs.size() == 0) { diff --git a/include/flucoma/clients/nrt/MLPRegressorClient.hpp b/include/flucoma/clients/nrt/MLPRegressorClient.hpp index 0aa22ecd..2db7098e 100644 --- a/include/flucoma/clients/nrt/MLPRegressorClient.hpp +++ b/include/flucoma/clients/nrt/MLPRegressorClient.hpp @@ -37,7 +37,8 @@ constexpr auto MLPRegressorParams = defineParameters( FloatParam("learnRate", "Learning Rate", 0.01, Min(0.0), Max(1.0)), FloatParam("momentum", "Momentum", 0.9, Min(0.0), Max(0.99)), LongParam("batchSize", "Batch Size", 50, Min(1)), - FloatParam("validation", "Validation Amount", 0.2, Min(0), Max(0.9))); + FloatParam("validation", "Validation Amount", 0.2, Min(0), Max(0.9)), + LongParam("seed","Random Seed", -1)); class MLPRegressorClient : public FluidBaseClient, OfflineIn, @@ -57,7 +58,8 @@ class MLPRegressorClient : public FluidBaseClient, kRate, kMomentum, kBatchSize, - kVal + kVal, + kRandomSeed }; public: @@ -120,7 +122,8 @@ class MLPRegressorClient : public FluidBaseClient, { mAlgorithm.init(sourceDataSet.pointSize(), targetDataSet.pointSize(), - get(), get(), outputAct); + get(), get(), outputAct, + get()); } if (auto missingIDs = sourceDataSet.checkIDs(targetDataSet); diff --git a/include/flucoma/data/FluidDataSetSampler.hpp b/include/flucoma/data/FluidDataSetSampler.hpp index 492ac9c0..f9aac453 100644 --- a/include/flucoma/data/FluidDataSetSampler.hpp +++ b/include/flucoma/data/FluidDataSetSampler.hpp @@ -46,9 +46,9 @@ class FluidDataSetSampler : public detail::DataSampler public: template FluidDataSetSampler(DataSetA const& in, DataSetB const& out, index batchSize, - double validationFraction, bool shuffle = true) + double validationFraction, bool shuffle = true, index seed = -1) : detail::DataSampler(in.size(), batchSize, - validationFraction, shuffle), + validationFraction, shuffle, seed), mIdxMaps{in.indexMap(out)} {} }; diff --git a/include/flucoma/data/FluidJSON.hpp b/include/flucoma/data/FluidJSON.hpp index 02943282..d2a3df7b 100644 --- a/include/flucoma/data/FluidJSON.hpp +++ b/include/flucoma/data/FluidJSON.hpp @@ -425,7 +425,7 @@ void from_json(const nlohmann::json &j, MLP &mlp) { hiddenSizes(i) = j["layers"][asUnsigned(i)]["cols"].get(); } } - mlp.init(inputSize,outputSize, hiddenSizes, activation, finalActivation); + mlp.init(inputSize,outputSize, hiddenSizes, activation, finalActivation, -1);//FIXME why do we this line? for (index i = 0; i < nLayers; i++){ auto l = j["layers"][asUnsigned(i)]; index rows = l["rows"].get(); diff --git a/include/flucoma/data/SimpleDataSampler.hpp b/include/flucoma/data/SimpleDataSampler.hpp index e09558ee..6bfad27c 100644 --- a/include/flucoma/data/SimpleDataSampler.hpp +++ b/include/flucoma/data/SimpleDataSampler.hpp @@ -37,9 +37,9 @@ class SimpleDataSampler : public detail::DataSampler public: SimpleDataSampler(index size, index batchSize, double validationFraction, - bool shuffle) + bool shuffle, index seed) : detail::DataSampler(size, batchSize, - validationFraction, shuffle) + validationFraction, shuffle, seed) {} }; diff --git a/include/flucoma/data/detail/DataSampler.hpp b/include/flucoma/data/detail/DataSampler.hpp index 9dd4dec9..7d391ecd 100644 --- a/include/flucoma/data/detail/DataSampler.hpp +++ b/include/flucoma/data/detail/DataSampler.hpp @@ -57,9 +57,9 @@ class DataSampler }; bool mShuffle; - index mTrainCount; - std::random_device mRnd; - std::mt19937 mGen{mRnd()}; + index mSeed; + index mTrainCount; + std::mt19937 mGen; std::vector mIdx; index mBatchSize; FluidTensor mBatch; @@ -78,10 +78,11 @@ class DataSampler protected: DataSampler(index size, index batchSize, double validationFraction, - bool shuffle) - : mShuffle{shuffle}, + bool shuffle, index seed) + : mShuffle{shuffle}, mSeed{seed}, mTrainCount{ std::lrint((1 - std::clamp(validationFraction, 0.0, 1.0)) * size)}, + mGen(static_cast(seed > 0 ? seed : std::random_device()())), mIdx(makeIndex(size, mShuffle)), mBatchSize{std::min(mTrainCount, batchSize)}, mBatch(batchSize + (mTrainCount % mBatchSize), 2), @@ -90,12 +91,9 @@ class DataSampler public: void reset() { - using std::begin, std::end; - + if (mSeed > 0) mGen.seed(asUnsigned(mSeed)); mBatchCount = 0; - if (mShuffle) - std::shuffle(begin(mIdx), begin(mIdx) + mTrainCount, - mGen); // preserve validation set + mIdx = makeIndex(mIdx.size(), mShuffle); } // Returns in / out indices for this batch (not the data) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 74ec9efa..21ff4007 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -117,6 +117,9 @@ add_test_executable(TestTransientSlice algorithms/public/TestTransientSlice.cpp) add_test_executable(TestMLP algorithms/public/TestMLP.cpp) add_test_executable(TestKMeans algorithms/public/TestKMeans.cpp) +add_test_executable(TestDataSampler data/detail/TestDataSampler.cpp) +add_test_executable(TestSGD algorithms/public/TestSGD.cpp) + target_link_libraries(TestNoveltySeg PRIVATE TestSignals) target_link_libraries(TestOnsetSeg PRIVATE TestSignals) target_link_libraries(TestEnvelopeSeg PRIVATE TestSignals) @@ -154,4 +157,7 @@ catch_discover_tests(TestMLP WORKING_DIRECTORY "${CMAKE_BINARY_DIR}") catch_discover_tests(TestKMeans WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) catch_discover_tests(TestEigenRandom WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) +catch_discover_tests(TestDataSampler) +catch_discover_tests(TestSGD) + add_compile_tests("FluidTensor Compilation Tests" data/compile_tests/TestFluidTensor_Compile.cpp) diff --git a/tests/algorithms/public/TestMLP.cpp b/tests/algorithms/public/TestMLP.cpp index 8ddc5e87..cbfe338a 100644 --- a/tests/algorithms/public/TestMLP.cpp +++ b/tests/algorithms/public/TestMLP.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include namespace fluid::algorithm { @@ -106,7 +107,7 @@ TEST_CASE("MLP works on precomputed example") // Make a network and set initial conditions MLP mlp = MLP(); index act = static_cast(NNActivations::Activation::kSigmoid); - mlp.init(3, 1, {2}, act, act); + mlp.init(3, 1, {2}, act, act,-1); FluidTensor layer0Coeffs = {{0.1, 0.2}, {0.3, 0.1}, {0.5, 0}}; FluidTensor layer1Coeffs = {{0.1}, {0.2}}; FluidTensor layer0Bias = {0.1, 0.1}; @@ -117,7 +118,7 @@ TEST_CASE("MLP works on precomputed example") // train for a single iteration SGD sgd; - sgd.train(mlp, x, y, 1, 1, 0.1, 0.0, 0.0); + sgd.train(mlp, x, y, 1, 1, 0.1, 0.0, 0.0,-1); // get our hand computed data auto [W1, W2, b1, b2] = manual(0.0, 0.1); @@ -221,7 +222,6 @@ TEST_CASE("Test batch loader for mismatched fluid datasets") index i = 0; for (auto batch : ds) { - std::cout << "ping\n"; index expectedSize = i++ == 0 ? batchSize + (N % batchSize) : batchSize; CHECK(batch->rows() == expectedSize); auto inputidx = batch->col(0); @@ -233,5 +233,37 @@ TEST_CASE("Test batch loader for mismatched fluid datasets") } } +TEST_CASE("MLP does repeatable things with manually set seed") +{ + using Tensor = FluidTensor; + using Vector = FluidTensor; + + std::vector weights(5, Tensor(3,3)); + Vector biases(3); + index dummyActivation; + + MLP model; + + //same seed should give same result, different seed should give differet result + model.init(3, 3,FluidTensor{3},0,0,42); + model.getParameters(0, weights[0],biases,dummyActivation); + model.init(3, 3,FluidTensor{3},0,0,42); + model.getParameters(0, weights[1],biases,dummyActivation); + model.init(3, 3,FluidTensor{3},0,0,2875); + model.getParameters(0, weights[2],biases,dummyActivation); + + //automatic seeding should give different succcesive results + model.init(3, 3,FluidTensor{3},0,0,-1); + model.getParameters(0, weights[3],biases,dummyActivation); + model.init(3, 3,FluidTensor{3},0,0,-1); + model.getParameters(0, weights[4],biases,dummyActivation); + + //only weights are stochastic + using Catch::Matchers::RangeEquals; + + REQUIRE_THAT(weights[1], RangeEquals(weights[0])); + REQUIRE_THAT(weights[1], !RangeEquals(weights[2])); + REQUIRE_THAT(weights[3], !RangeEquals(weights[4])); +} } // namespace fluid::algorithm \ No newline at end of file diff --git a/tests/algorithms/public/TestSGD.cpp b/tests/algorithms/public/TestSGD.cpp new file mode 100644 index 00000000..b47fc87e --- /dev/null +++ b/tests/algorithms/public/TestSGD.cpp @@ -0,0 +1,97 @@ +#define CATCH_CONFIG_MAIN + +#include +#include +#include +#include +#include +#include +#include + +namespace fluid::algorithm { + +using Tensor = FluidTensor; + + +index N = 64; +index nIter = 1; +index batchSize = N; +double learnRate = 0.1; +double momentum = 0.0; +double valFrac = 0.0; + + +TEST_CASE("SGD is repeatable with manually set seed") +{ + + std::vector models(3, MLP()); + SGD algo; + + + Tensor input(64, 1); + std::iota(input.begin(), input.end(), 0.0); + Tensor output(input); + + models[0].init(1, 1, FluidTensor{2}, 0, 0, -1); + models[1] = models[0]; + models[2] = models[1]; + + double error = algo.train(models[0], input, output, 1, batchSize / 2, + learnRate, momentum, valFrac, 42); + REQUIRE_FALSE(error == -1); + error = algo.train(models[1], input, output, 1, batchSize / 2, learnRate, + momentum, valFrac, 42); + REQUIRE_FALSE(error == -1); + error = algo.train(models[2], input, output, 1, batchSize / 2, learnRate, + momentum, valFrac, 28976); + REQUIRE_FALSE(error == -1); + + + std::vector weights(3, Tensor(1, 2)); + std::vector biases(3, FluidTensor(2)); + std::vector activations(3); + + models[0].getParameters(0, weights[0], biases[0], activations[0]); + models[1].getParameters(0, weights[1], biases[1], activations[1]); + models[2].getParameters(0, weights[2], biases[2], activations[2]); + + using Catch::Matchers::RangeEquals; + // only weights are stochastic + REQUIRE_THAT(weights[1], RangeEquals(weights[0])); + REQUIRE_THAT(weights[1], !RangeEquals(weights[2])); +} + +TEST_CASE("Failed training doesn't mutate model") +{ + + MLP model; + SGD algo; + + + Tensor input(64, 1); + Tensor output(64, 1); + input.fill(0); + // adding a NaN to fail training + input(31, 0) = std::numeric_limits::quiet_NaN(); + + std::vector weights(2, Tensor(1, 2)); + std::vector biases(2, FluidTensor(2)); + std::vector activations(2); + + model.init(1, 1, FluidTensor{2}, 0, 0, -1); + + model.getParameters(0, weights[0], biases[0], activations[0]); + + double error = algo.train(model, input, output, nIter, batchSize, learnRate, + momentum, valFrac, -1); + REQUIRE(error == -1); + model.getParameters(0, weights[1], biases[1], activations[1]); + + using Catch::Matchers::RangeEquals; + REQUIRE_THAT(weights[1], RangeEquals(weights[0])); + REQUIRE_THAT(biases[1], RangeEquals(biases[0])); + REQUIRE(activations[1] == activations[0]); +} + + +} // namespace fluid::algorithm \ No newline at end of file diff --git a/tests/data/detail/TestDataSampler.cpp b/tests/data/detail/TestDataSampler.cpp new file mode 100644 index 00000000..eb03a3f6 --- /dev/null +++ b/tests/data/detail/TestDataSampler.cpp @@ -0,0 +1,62 @@ +#define CATCH_CONFIG_MAIN + +#include +#include +#include +#include +#include + +namespace fluid::detail { + +TEST_CASE("DataSampler gives reproduceable results with manually set seed") +{ + + using Tensor = FluidTensor; + + index N = 64; + + std::vector train(6, Tensor(N / 2, 2)); + std::vector val(6, Tensor(N / 2, 2)); + + SimpleDataSampler d(N, N, 0.5, true, 42); + train[0] <<= *d.nextBatch(); + val[0] <<= *d.validationSet(); + + using Catch::Matchers::RangeEquals; + SECTION("reset() is repeatable with random seed") + { + d.reset(); + train[1] <<= *d.nextBatch(); + val[1] <<= *d.validationSet(); + REQUIRE_THAT(train[1], RangeEquals(train[0])); + REQUIRE_THAT(val[1], RangeEquals(val[0])); + } + SECTION("new instance with same seed is repeaable") + { + d = SimpleDataSampler(N, N, 0.5, true, 42); + train[2] <<= *d.nextBatch(); + val[2] <<= *d.validationSet(); + REQUIRE_THAT(train[2], RangeEquals(train[0])); + REQUIRE_THAT(val[2], RangeEquals(val[0])); + } + SECTION("different seed gives different result") + { + d = SimpleDataSampler(N, N, 0.5, true, 23498); + train[3] <<= *d.nextBatch(); + val[3] <<= *d.validationSet(); + REQUIRE_THAT(train[3], !RangeEquals(train[0])); + REQUIRE_THAT(val[3], !RangeEquals(val[0])); + } + SECTION("automatic seeding gives different results") + { + d = SimpleDataSampler(N, N, 0.5, true, -1); + train[4] <<= *d.nextBatch(); + val[4] <<= *d.validationSet(); + d = SimpleDataSampler(N, N, 0.5, true, -1); + train[5] <<= *d.nextBatch(); + val[5] <<= *d.validationSet(); + REQUIRE_THAT(train[5], !RangeEquals(train[4])); + REQUIRE_THAT(val[5], !RangeEquals(val[4])); + } +} +} // namespace fluid::detail \ No newline at end of file