From 80a60d1740882c9304372615e747f4c9b6c63fd7 Mon Sep 17 00:00:00 2001 From: Robert Wojciechowski Date: Thu, 13 Nov 2025 14:09:50 +0100 Subject: [PATCH 1/6] FileStream class with LinesRange accessor --- .../complianceengine/src/lib/CMakeLists.txt | 1 + .../src/lib/DistributionInfo.cpp | 1 + .../src/lib/procedures/FileRegexMatch.cpp | 34 +++++++------ .../src/lib/procedures/PackageInstalled.cpp | 50 +++++++++++-------- 4 files changed, 48 insertions(+), 38 deletions(-) diff --git a/src/modules/complianceengine/src/lib/CMakeLists.txt b/src/modules/complianceengine/src/lib/CMakeLists.txt index 6e2f9b50f8..16ecc18756 100644 --- a/src/modules/complianceengine/src/lib/CMakeLists.txt +++ b/src/modules/complianceengine/src/lib/CMakeLists.txt @@ -138,6 +138,7 @@ add_library(complianceenginelib STATIC DistributionInfo.cpp Engine.cpp Evaluator.cpp + FileStream.cpp FileTreeWalk.cpp FilesystemScanner.cpp GroupsIterator.cpp diff --git a/src/modules/complianceengine/src/lib/DistributionInfo.cpp b/src/modules/complianceengine/src/lib/DistributionInfo.cpp index 4ad634cfce..3eaec88123 100644 --- a/src/modules/complianceengine/src/lib/DistributionInfo.cpp +++ b/src/modules/complianceengine/src/lib/DistributionInfo.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/src/modules/complianceengine/src/lib/procedures/FileRegexMatch.cpp b/src/modules/complianceengine/src/lib/procedures/FileRegexMatch.cpp index e4fd64235c..c3782e9641 100644 --- a/src/modules/complianceengine/src/lib/procedures/FileRegexMatch.cpp +++ b/src/modules/complianceengine/src/lib/procedures/FileRegexMatch.cpp @@ -3,16 +3,22 @@ #include #include #include +#include #include #include #include #include #include -#include + +#define AssertResult(variable, ...) \ + if (!(variable).HasValue()) \ + { \ + OsConfigLogError(context.GetLogHandle(), __VA_ARGS__); \ + return (variable).Error(); \ + } namespace ComplianceEngine { -using std::ifstream; using std::string; using std::regex_constants::syntax_option_type; namespace @@ -33,11 +39,9 @@ Result MultilineMatch(const std::string& filename, const string& matchPatt Optional matchRegex; Optional stateRegex; - ifstream input(filename); - if (!input.is_open()) - { - return Error("Failed to open file: " + filename, errno); - } + auto input = FileStream::Open(filename, context); + AssertResult(input, "Failed to open '%s'", filename.c_str()); + try { matchRegex = regex(matchPattern, syntaxOptions.first); @@ -53,18 +57,16 @@ Result MultilineMatch(const std::string& filename, const string& matchPatt } int lineNumber = 0; - - string line; - // Special case for empty files, read empty line then - while (getline(input, line) || lineNumber == 0) + for (auto line : input->Lines()) { + AssertResult(line, "Failed to read '%s'", filename.c_str()); lineNumber++; - OsConfigLogDebug(context.GetLogHandle(), "Matching line %d: '%s', pattern: '%s'", lineNumber, line.c_str(), matchPattern.c_str()); + OsConfigLogDebug(context.GetLogHandle(), "Matching line %d: '%s', pattern: '%s'", lineNumber, line->c_str(), matchPattern.c_str()); smatch match; - if (regex_search(line, match, matchRegex.Value())) + if (regex_search(line.Value(), match, matchRegex.Value())) { - OsConfigLogDebug(context.GetLogHandle(), "Matched line %d: %s", lineNumber, line.c_str()); + OsConfigLogDebug(context.GetLogHandle(), "Matched line %d: %s", lineNumber, line->c_str()); if (stateRegex.HasValue()) { assert(match.ready()); @@ -73,13 +75,13 @@ Result MultilineMatch(const std::string& filename, const string& matchPatt OsConfigLogDebug(context.GetLogHandle(), "Value to match: %s", valueToMatch.c_str()); if (regex_search(valueToMatch, stateRegex.Value())) { - OsConfigLogDebug(context.GetLogHandle(), "Matched line %d: %s", lineNumber, line.c_str()); + OsConfigLogDebug(context.GetLogHandle(), "Matched line %d: %s", lineNumber, line->c_str()); return true; } } else { - OsConfigLogDebug(context.GetLogHandle(), "Matched line %d: %s", lineNumber, line.c_str()); + OsConfigLogDebug(context.GetLogHandle(), "Matched line %d: %s", lineNumber, line->c_str()); return true; } } diff --git a/src/modules/complianceengine/src/lib/procedures/PackageInstalled.cpp b/src/modules/complianceengine/src/lib/procedures/PackageInstalled.cpp index 37ea169070..0976cba4d9 100644 --- a/src/modules/complianceengine/src/lib/procedures/PackageInstalled.cpp +++ b/src/modules/complianceengine/src/lib/procedures/PackageInstalled.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -37,10 +38,12 @@ class ScopeGuard template ScopeGuard(Callable&& f) : f(std::forward(f)){}; + void Deactivate() { active = false; } + ~ScopeGuard() { if (active) @@ -76,32 +79,34 @@ Result DetectPackageManager(ContextInterface& context) return Error("No package manager found", ENOENT); } -Result LoadPackageCache(const std::string& path) +Result LoadPackageCache(const std::string& path, ContextInterface& context) { PackageCache cache; const std::string pkgCacheHeader = "# PackageCache "; // "# PackageCache @\n" cache.lastUpdateTime = 0; cache.packageManager = PackageManagerType::Autodetect; cache.packages.clear(); - std::ifstream cacheFile(path); - if (!cacheFile.is_open()) + auto cacheFileResult = ComplianceEngine::FileStream::Open(path, context); + if (!cacheFileResult.HasValue()) { - return Error("Failed to open cache file: " + path); + return cacheFileResult.Error(); } + auto cacheFile = std::move(cacheFileResult.Value()); - std::string header; - if (!std::getline(cacheFile, header) || (0 != header.find(pkgCacheHeader, 0))) + auto header = cacheFile.ReadLine(); + if (!header.HasValue()) { - return Error("Invalid cache file format"); + OsConfigLogError(context.GetLogHandle(), "Failed to read '%s'", path.c_str()); + return header.Error(); } - auto separatorPos = header.find('@'); + auto separatorPos = header->find('@'); if (std::string::npos == separatorPos) { return Error("Invalid cache file header format"); } - const auto packageMangerStr = header.substr(pkgCacheHeader.length(), separatorPos - pkgCacheHeader.length()); + const auto packageMangerStr = header->substr(pkgCacheHeader.length(), separatorPos - pkgCacheHeader.length()); if (packageMangerStr == "dpkg") cache.packageManager = PackageManagerType::DPKG; else if (packageMangerStr == "rpm") @@ -110,34 +115,35 @@ Result LoadPackageCache(const std::string& path) return Error("Invalid package manager type"); try { - cache.lastUpdateTime = std::stol(header.substr(separatorPos + 1)); + cache.lastUpdateTime = std::stol(header->substr(separatorPos + 1)); } catch (const std::exception&) { return Error("Invalid timestamp in cache file header"); } - std::string line; - while (std::getline(cacheFile, line)) + while (!cacheFile.AtEnd()) { - if (!line.empty()) + auto line = cacheFile.ReadLine(); + if (!line) { - auto sepPos = line.find(' '); + OsConfigLogError(context.GetLogHandle(), "%s", line.Error().message.c_str()); + return line.Error(); + } + + if (!line->empty()) + { + auto sepPos = line->find(' '); if (sepPos == std::string::npos) { continue; // Skip lines without a space } - std::string packageName = line.substr(0, sepPos); - std::string packageVersion = line.substr(sepPos + 1); + std::string packageName = line->substr(0, sepPos); + std::string packageVersion = line->substr(sepPos + 1); cache.packages[packageName] = packageVersion; } } - if (cacheFile.bad()) - { - return Error("Error reading cache file"); - } - return cache; } @@ -454,7 +460,7 @@ Result AuditPackageInstalled(const PackageInstalledParams& params, Indic auto log = context.GetLogHandle(); PackageCache cache; - auto cacheResult = LoadPackageCache(params.test_cachePath.Value()); + auto cacheResult = LoadPackageCache(params.test_cachePath.Value(), context); bool cacheValid = true; bool cacheStale = false; if (cacheResult.HasValue()) From 01857f937ecda8d112267e2c74cf23b35250f1dd Mon Sep 17 00:00:00 2001 From: Robert Wojciechowski Date: Fri, 14 Nov 2025 12:51:58 +0100 Subject: [PATCH 2/6] Add tests for line reading --- .../complianceengine/src/lib/CMakeLists.txt | 2 +- .../src/lib/DistributionInfo.cpp | 2 +- .../complianceengine/src/lib/Evaluator.h | 7 + .../complianceengine/src/lib/InputStream.cpp | 271 ++++++++++++++++++ .../complianceengine/src/lib/InputStream.h | 134 +++++++++ .../src/lib/procedures/FileRegexMatch.cpp | 11 +- .../src/lib/procedures/PackageInstalled.cpp | 6 +- .../complianceengine/tests/CMakeLists.txt | 1 + .../tests/InputStreamTest.cpp | 270 +++++++++++++++++ 9 files changed, 690 insertions(+), 14 deletions(-) create mode 100644 src/modules/complianceengine/src/lib/InputStream.cpp create mode 100644 src/modules/complianceengine/src/lib/InputStream.h create mode 100644 src/modules/complianceengine/tests/InputStreamTest.cpp diff --git a/src/modules/complianceengine/src/lib/CMakeLists.txt b/src/modules/complianceengine/src/lib/CMakeLists.txt index 16ecc18756..33a01d1c3c 100644 --- a/src/modules/complianceengine/src/lib/CMakeLists.txt +++ b/src/modules/complianceengine/src/lib/CMakeLists.txt @@ -138,7 +138,7 @@ add_library(complianceenginelib STATIC DistributionInfo.cpp Engine.cpp Evaluator.cpp - FileStream.cpp + InputStream.cpp FileTreeWalk.cpp FilesystemScanner.cpp GroupsIterator.cpp diff --git a/src/modules/complianceengine/src/lib/DistributionInfo.cpp b/src/modules/complianceengine/src/lib/DistributionInfo.cpp index 3eaec88123..f074533e8e 100644 --- a/src/modules/complianceengine/src/lib/DistributionInfo.cpp +++ b/src/modules/complianceengine/src/lib/DistributionInfo.cpp @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/modules/complianceengine/src/lib/Evaluator.h b/src/modules/complianceengine/src/lib/Evaluator.h index 4bf8db09c2..86b0a32042 100644 --- a/src/modules/complianceengine/src/lib/Evaluator.h +++ b/src/modules/complianceengine/src/lib/Evaluator.h @@ -15,6 +15,13 @@ #include #include +#define AssertResult(variable, fmt, ...) \ + if (!(variable).HasValue()) \ + { \ + OsConfigLogError(context.GetLogHandle(), fmt, __VA_ARGS__); \ + return (variable).Error(); \ + } + struct json_object_t; struct json_value_t; diff --git a/src/modules/complianceengine/src/lib/InputStream.cpp b/src/modules/complianceengine/src/lib/InputStream.cpp new file mode 100644 index 0000000000..75322c42a5 --- /dev/null +++ b/src/modules/complianceengine/src/lib/InputStream.cpp @@ -0,0 +1,271 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include +#include +#include +#include + +namespace ComplianceEngine +{ +using std::ifstream; +using std::string; + +InputStream::InputStream(string fileName, ContextInterface& context) + : mContext(context), + mFileName(std::move(fileName)) +{ + assert(!mStream.is_open()); +} + +InputStream::InputStream(InputStream&& other) noexcept + : mContext(other.mContext), + mFileName(std::move(other.mFileName)), + mStream(std::move(other.mStream)) +{ + assert(mStream.is_open()); +} + +InputStream& InputStream::operator=(InputStream&& other) noexcept +{ + if (this == &other) + return *this; + + // The context reference must be constant for moves to work and it's global anyway + assert(&mContext == &other.mContext); + mFileName = std::move(other.mFileName); + mStream = std::move(other.mStream); + assert(mStream.is_open()); + return *this; +} + +Result InputStream::Open(const string& fileName, ContextInterface& context) +{ + // access lets us determine readability and obtain error codes + if (0 != ::access(fileName.c_str(), R_OK)) + { + const auto status = errno; + OsConfigLogInfo(context.GetLogHandle(), "Failed to access '%s': %s (%d)", fileName.c_str(), strerror(status), status); + return Error(string("failed to access '") + fileName + "': " + strerror(status), status); + } + + InputStream result(fileName, context); + result.mStream.open(context.GetSpecialFilePath(fileName)); + if (!result.mStream.is_open()) + { + OsConfigLogInfo(context.GetLogHandle(), "Failed to open '%s'", result.mFileName.c_str()); + return Error(string("failed to open '") + result.mFileName + "'"); + } + + assert(result.mStream.is_open()); + return result; +} + +Result InputStream::ReadLine() +{ + assert(mStream.is_open()); + if (mBytesRead >= cMaxReadSize) + { + OsConfigLogError(mContext.GetLogHandle(), "Maximum file '%s' read size reached", mFileName.c_str()); + return Error("maximum file '" + mFileName + "' read size reached", E2BIG); + } + + // We want to always check with Good() before reading + if (mStream.eof()) + { + OsConfigLogError(mContext.GetLogHandle(), "Attempted to read file '%s' after EOF", mFileName.c_str()); + return Error(string("attempted to read file '") + mFileName + "' after EOF", EBADFD); + } + + // We won't return empty strings in case an error happened previously + if (mStream.fail() || mStream.bad()) + { + OsConfigLogError(mContext.GetLogHandle(), "Attempted to read file '%s' after failure", mFileName.c_str()); + return Error(string("attempted to read file '") + mFileName + "' after failure", EBADFD); + } + // eof(), fail(), bad() and limits are already checked + assert(Good()); + + // Read the data + string line; + std::getline(mStream, line); + + // Include the line size without the newline character + mBytesRead += line.size(); + if (!mStream.eof()) + { + // Here we know a newline has been parsed. + ++mBytesRead; + } + + if (mStream.bad()) + { + // fail() may return true here in case there's no trailing newline character, so we stick to bad(). + OsConfigLogError(mContext.GetLogHandle(), "Failed to read line from '%s': %d", mFileName.c_str(), static_cast(mStream.rdstate())); + return Error("failed to read line from '" + mFileName + "'", EBADFD); + } + + return Result(std::move(line)); +} + +bool InputStream::Good() const +{ + assert(mStream.is_open()); + return mBytesRead < cMaxReadSize && mStream.good(); +} + +bool InputStream::AtEnd() const +{ + assert(mStream.is_open()); + return mStream.eof(); +} + +const string& InputStream::GetFileName() const +{ + assert(mStream.is_open()); + return mFileName; +} + +LinesRange InputStream::Lines() & +{ + return LinesRange(*this); +} + +size_t InputStream::BytesRead() const +{ + return mBytesRead; +} + +LinesIterator::LinesIterator(InputStream& stream) + : mStream(stream) +{ +} + +LinesIterator::LinesIterator(const LinesIterator& other) + : mStream(other.mStream), + mValue(other.mValue) +{ +} + +LinesIterator::LinesIterator(LinesIterator&& other) noexcept + : mStream(other.mStream), + mValue(std::move(other.mValue)) +{ +} + +LinesIterator& LinesIterator::operator=(const LinesIterator& other) +{ + if (this == &other) + return *this; + + assert(&mStream == &other.mStream); + mValue = other.mValue; + return *this; +} + +LinesIterator& LinesIterator::operator=(LinesIterator&& other) noexcept +{ + if (this == &other) + return *this; + + assert(&mStream == &other.mStream); + mValue = std::move(other.mValue); + return *this; +} + +LinesIterator& LinesIterator::operator++() +{ + if (mStream.Good()) + mValue = mStream.ReadLine(); + else + mValue.Reset(); + return *this; +} + +LinesIterator LinesIterator::operator++(int) +{ + LinesIterator tmp = *this; + ++*this; + return tmp; +} + +Result LinesIterator::operator*() const& noexcept(false) +{ + CheckValue(); + return mValue.Value(); +} + +Result LinesIterator::operator*() && noexcept(false) +{ + CheckValue(); + return std::move(mValue.Value()); +} + +const Result* LinesIterator::operator->() const noexcept(false) +{ + CheckValue(); + return &mValue.Value(); +} + +bool LinesIterator::IsEnd() const +{ + return mValue.HasValue(); +} + +void LinesIterator::CheckValue() const noexcept(false) +{ + if (!IsEnd()) + { + throw std::logic_error("LinesIterator: unchecked access to Value"); + } +} + +// The comparison makes only sense to compare with the end() iterator +bool LinesIterator::operator==(const LinesIterator& other) const +{ + return mValue.HasValue() == other.mValue.HasValue(); +} + +bool LinesIterator::operator!=(const LinesIterator& other) const +{ + return !(*this == other); +} + +LinesRange::LinesRange(InputStream& stream) + : mStream(stream) +{ +} + +LinesRange::LinesRange(const LinesRange& other) + : mStream(other.mStream) +{ +} + +LinesRange::LinesRange(LinesRange&& other) noexcept + : mStream(other.mStream) +{ +} + +LinesRange& LinesRange::operator=(const LinesRange& other) +{ + assert(&mStream == &other.mStream); + return *this; +} + +LinesRange& LinesRange::operator=(LinesRange&& other) noexcept +{ + assert(&mStream == &other.mStream); + return *this; +} + +LinesIterator LinesRange::begin() const +{ + return ++LinesIterator(mStream); +} + +LinesIterator LinesRange::end() const +{ + return LinesIterator(mStream); +} + +} // namespace ComplianceEngine diff --git a/src/modules/complianceengine/src/lib/InputStream.h b/src/modules/complianceengine/src/lib/InputStream.h new file mode 100644 index 0000000000..104b7d8633 --- /dev/null +++ b/src/modules/complianceengine/src/lib/InputStream.h @@ -0,0 +1,134 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#ifndef COMPLIANCEENGINE_FILE_STREAM_H +#define COMPLIANCEENGINE_FILE_STREAM_H + +#include +#include +#include +#include + +namespace ComplianceEngine +{ +class LinesRange; + +// This class wraps the C++ std::ifstream with +// CE-specific error handling and a read size limit. +// +// 1. It uses factory method instead of constructors, to provide error handling with Result. +// 2. It guarantees an instance is always assosciated with an open file. +// 3. Any previous errors cause subsequent reads to fail +// 4. Context is used to provide mocking mechanism - filenames can be overridden for testing. +// 5. Maximum number of bytes to read is limited to cMaxReadSize. This is a soft limit as we allow +// to read last full line using std::getline. +class InputStream +{ +public: + // Maximum number of bytes read from an input stream + static constexpr std::size_t cMaxReadSize = 1024 * 1024 * 128; + + InputStream(const InputStream&) = delete; + InputStream(InputStream&&) noexcept; + InputStream& operator=(const InputStream&) = delete; + InputStream& operator=(InputStream&&) noexcept; + ~InputStream() = default; + + // Opens a file for reading. + static Result Open(const std::string& fileName, ContextInterface& context); + + // Reads a single line. + Result ReadLine(); + + // Returns true in case more bytes can be read from the file. + // This means there was no error returned so far, + // we have not reached end of the file, and we have not reached + // the read size limit. + bool Good() const; + + // Returns true in case we have reached end of the file. + bool AtEnd() const; + + // Returns the file name passed to Open. + // Note: In case of mocking, the underlying filename is not stored. + const std::string& GetFileName() const; + + // Obtains a range for line-by-line range-based iteration. + LinesRange Lines() &; + + // Obtains the number of bytes read so far. + std::size_t BytesRead() const; + +private: + InputStream() = delete; + + explicit InputStream(std::string fileName, ContextInterface& context); + + // Holds logger handle and allows mocking. + ContextInterface& mContext; + + // The file name passed to Open. + std::string mFileName; + + // The underlying stream. + std::ifstream mStream; + + // The number of bytes read so far. + std::size_t mBytesRead = 0; +}; + +// This class allows to iterate a file line by line. +class LinesIterator +{ +public: + LinesIterator(const LinesIterator& other); + LinesIterator(LinesIterator&& other) noexcept; + LinesIterator& operator=(const LinesIterator& other); + LinesIterator& operator=(LinesIterator&& other) noexcept; + ~LinesIterator() = default; + + LinesIterator& operator++(); + LinesIterator operator++(int); + Result operator*() const& noexcept(false); + Result operator*() && noexcept(false); + const Result* operator->() const noexcept(false); + bool operator==(const LinesIterator& other) const; + bool operator!=(const LinesIterator& other) const; + + // Returns true in case this is the end iterator. + bool IsEnd() const; + +private: + friend class LinesRange; + + InputStream& mStream; + // End iterator has nullopt assigned + Optional> mValue = Optional>(); + + // Only range class(es) are able to construct new iterators + explicit LinesIterator(InputStream& stream); + + // Throws in case of end iterator dereference. + void CheckValue() const noexcept(false); +}; + +// This class provides an interface for the range-based for loops. +class LinesRange +{ +public: + explicit LinesRange(InputStream& stream); + LinesRange(const LinesRange& other); + LinesRange(LinesRange&& other) noexcept; + LinesRange& operator=(const LinesRange& other); + LinesRange& operator=(LinesRange&& other) noexcept; + ~LinesRange() = default; + + LinesIterator begin() const; // NOLINT(*-identifier-naming) + LinesIterator end() const; // NOLINT(*-identifier-naming) + +private: + InputStream& mStream; +}; +} // namespace ComplianceEngine + +#endif // COMPLIANCEENGINE_FILE_STREAM_H diff --git a/src/modules/complianceengine/src/lib/procedures/FileRegexMatch.cpp b/src/modules/complianceengine/src/lib/procedures/FileRegexMatch.cpp index c3782e9641..b3da6662db 100644 --- a/src/modules/complianceengine/src/lib/procedures/FileRegexMatch.cpp +++ b/src/modules/complianceengine/src/lib/procedures/FileRegexMatch.cpp @@ -3,20 +3,13 @@ #include #include #include -#include +#include #include #include #include #include #include -#define AssertResult(variable, ...) \ - if (!(variable).HasValue()) \ - { \ - OsConfigLogError(context.GetLogHandle(), __VA_ARGS__); \ - return (variable).Error(); \ - } - namespace ComplianceEngine { using std::string; @@ -39,7 +32,7 @@ Result MultilineMatch(const std::string& filename, const string& matchPatt Optional matchRegex; Optional stateRegex; - auto input = FileStream::Open(filename, context); + auto input = InputStream::Open(filename, context); AssertResult(input, "Failed to open '%s'", filename.c_str()); try diff --git a/src/modules/complianceengine/src/lib/procedures/PackageInstalled.cpp b/src/modules/complianceengine/src/lib/procedures/PackageInstalled.cpp index 0976cba4d9..c27bbc1ce4 100644 --- a/src/modules/complianceengine/src/lib/procedures/PackageInstalled.cpp +++ b/src/modules/complianceengine/src/lib/procedures/PackageInstalled.cpp @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include #include @@ -86,7 +86,7 @@ Result LoadPackageCache(const std::string& path, ContextInterface& cache.lastUpdateTime = 0; cache.packageManager = PackageManagerType::Autodetect; cache.packages.clear(); - auto cacheFileResult = ComplianceEngine::FileStream::Open(path, context); + auto cacheFileResult = ComplianceEngine::InputStream::Open(path, context); if (!cacheFileResult.HasValue()) { return cacheFileResult.Error(); @@ -122,7 +122,7 @@ Result LoadPackageCache(const std::string& path, ContextInterface& return Error("Invalid timestamp in cache file header"); } - while (!cacheFile.AtEnd()) + while (cacheFile.Good()) { auto line = cacheFile.ReadLine(); if (!line) diff --git a/src/modules/complianceengine/tests/CMakeLists.txt b/src/modules/complianceengine/tests/CMakeLists.txt index c7a98661a0..c6270abacc 100644 --- a/src/modules/complianceengine/tests/CMakeLists.txt +++ b/src/modules/complianceengine/tests/CMakeLists.txt @@ -58,6 +58,7 @@ add_executable(complianceenginetests BindingsTest.cpp CommonContextTest.cpp ComplianceEngineTest.cpp + InputStreamTest.cpp FilesystemScannerTest.cpp DistributionInfoTest.cpp EngineTest.cpp diff --git a/src/modules/complianceengine/tests/InputStreamTest.cpp b/src/modules/complianceengine/tests/InputStreamTest.cpp new file mode 100644 index 0000000000..f8be42dbde --- /dev/null +++ b/src/modules/complianceengine/tests/InputStreamTest.cpp @@ -0,0 +1,270 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include +#include +#include + +using ComplianceEngine::Error; +using ComplianceEngine::InputStream; +using ComplianceEngine::Result; +using std::string; + +class InputStreamTest : public ::testing::Test +{ +protected: + MockContext mContext; +}; + +TEST_F(InputStreamTest, DoesNotExist) +{ + auto result = InputStream::Open("nonexistentfile", mContext); + ASSERT_FALSE(result.HasValue()); + EXPECT_EQ(result.Error().code, ENOENT); +} + +TEST_F(InputStreamTest, EmptyFile) +{ + const auto filename = mContext.MakeTempfile(""); + auto result = InputStream::Open(filename, mContext); + ASSERT_TRUE(result.HasValue()); + + // Not yet at end as we haven't read anything yet. + EXPECT_TRUE(result->Good()); + EXPECT_FALSE(result->AtEnd()); + + // This should return an empty line + auto line = result->ReadLine(); + ASSERT_TRUE(line.HasValue()); + EXPECT_EQ(line.Value(), string()); + + // Subsequent reads should fail as we've reached EOF + EXPECT_FALSE(result->Good()); + EXPECT_TRUE(result->AtEnd()); + line = result->ReadLine(); + ASSERT_FALSE(line.HasValue()); + EXPECT_EQ(line.Error().code, EBADFD); +} + +TEST_F(InputStreamTest, SingleLine) +{ + const auto filename = mContext.MakeTempfile("foo\n"); + auto result = InputStream::Open(filename, mContext); + ASSERT_TRUE(result.HasValue()); + + // Not yet at end as we haven't read anything yet. + EXPECT_TRUE(result->Good()); + EXPECT_FALSE(result->AtEnd()); + + // This should return a line with 'foo' contents + auto line = result->ReadLine(); + ASSERT_TRUE(line.HasValue()); + EXPECT_EQ(line.Value(), string("foo")); + + // Not yet at end as we have not reached EOF state yet + EXPECT_TRUE(result->Good()); + EXPECT_FALSE(result->AtEnd()); + + // This should return an empty line + line = result->ReadLine(); + ASSERT_TRUE(line.HasValue()); + EXPECT_EQ(line.Value(), string()); + + // Subsequent reads should fail as we've reached EOF + EXPECT_FALSE(result->Good()); + EXPECT_TRUE(result->AtEnd()); + line = result->ReadLine(); + ASSERT_FALSE(line.HasValue()); + EXPECT_EQ(line.Error().code, EBADFD); +} + +TEST_F(InputStreamTest, MultipleLines) +{ + const auto filename = mContext.MakeTempfile("foo \n bar \r\nbaz"); + auto result = InputStream::Open(filename, mContext); + ASSERT_TRUE(result.HasValue()); + + // Not yet at end as we haven't read anything yet. + EXPECT_TRUE(result->Good()); + EXPECT_FALSE(result->AtEnd()); + + // This should return a line with 'foo' contents + auto line = result->ReadLine(); + ASSERT_TRUE(line.HasValue()); + EXPECT_EQ(line.Value(), string("foo ")); + + // Not yet at end as we have not reached EOF state yet + EXPECT_TRUE(result->Good()); + EXPECT_FALSE(result->AtEnd()); + + // This should return a line with 'bar' contents. + // It will include the \r as well as it runs on Linux (std::getline behavior) + line = result->ReadLine(); + ASSERT_TRUE(line.HasValue()); + EXPECT_EQ(line.Value(), string(" bar \r")); + + // This should return a line with 'baz' contents + line = result->ReadLine(); + ASSERT_TRUE(line.HasValue()); + EXPECT_EQ(line.Value(), string("baz")); + + // We've reached the EOF as there's no line ending at the end of input. + EXPECT_FALSE(result->Good()); + EXPECT_TRUE(result->AtEnd()); + line = result->ReadLine(); + ASSERT_FALSE(line.HasValue()); + EXPECT_EQ(line.Error().code, EBADFD); +} + +TEST_F(InputStreamTest, Range_MultipleLines) +{ + const auto filename = mContext.MakeTempfile("foo\nbar\nbaz\n\n"); + auto result = InputStream::Open(filename, mContext); + ASSERT_TRUE(result.HasValue()); + + string test; + int counter = 0; + // Test the LinesRange with iterator for range-based for loops use-case + for (auto line : result->Lines()) + { + ASSERT_TRUE(line.HasValue()); + test += line.Value(); + ++counter; + } + + EXPECT_FALSE(result->Good()); + EXPECT_EQ(test, string("foobarbaz")); + EXPECT_EQ(counter, 5); + EXPECT_FALSE(result->Good()); + EXPECT_TRUE(result->AtEnd()); +} + +TEST_F(InputStreamTest, Mocking) +{ + const auto filename = mContext.MakeTempfile("foo"); + mContext.SetSpecialFilePath("/etc/passwd", filename); + + // The /etc/passwd file should be masked by the tempfile we've just created + auto result = InputStream::Open("/etc/passwd", mContext); + ASSERT_TRUE(result.HasValue()); + auto line = result->ReadLine(); + ASSERT_TRUE(line.HasValue()); + EXPECT_EQ(line.Value(), string("foo")); +} + +TEST_F(InputStreamTest, LimitsHandling_1) +{ + const auto input = string(); + const auto filename = mContext.MakeTempfile(input); + auto result = InputStream::Open(filename, mContext); + ASSERT_TRUE(result.HasValue()); + auto line = result->ReadLine(); + ASSERT_TRUE(line.HasValue()); + EXPECT_EQ(line.Value(), string()); + EXPECT_EQ(result->BytesRead(), input.size()); +} + +TEST_F(InputStreamTest, LimitsHandling_2) +{ + const auto input = string("foo"); + const auto filename = mContext.MakeTempfile(input); + auto result = InputStream::Open(filename, mContext); + ASSERT_TRUE(result.HasValue()); + auto line = result->ReadLine(); + ASSERT_TRUE(line.HasValue()); + EXPECT_EQ(line.Value(), string("foo")); + EXPECT_EQ(result->BytesRead(), input.size()); +} + +TEST_F(InputStreamTest, LimitsHandling_3) +{ + const auto input = string("foo\n"); + const auto filename = mContext.MakeTempfile(input); + auto result = InputStream::Open(filename, mContext); + ASSERT_TRUE(result.HasValue()); + auto line = result->ReadLine(); + ASSERT_TRUE(line.HasValue()); + EXPECT_EQ(line.Value(), string("foo")); + EXPECT_EQ(result->BytesRead(), input.size()); +} + +TEST_F(InputStreamTest, LimitsHandling_4) +{ + const auto input = string("foo\n\nbar"); + const auto filename = mContext.MakeTempfile(input); + auto result = InputStream::Open(filename, mContext); + ASSERT_TRUE(result.HasValue()); + for (auto line : result->Lines()) + { + ASSERT_TRUE(line.HasValue()); + } + EXPECT_EQ(result->BytesRead(), input.size()); +} + +TEST_F(InputStreamTest, LimitsHandling_6) +{ + const auto input = string(InputStream::cMaxReadSize, 'x'); + const auto filename = mContext.MakeTempfile(input); + auto result = InputStream::Open(filename, mContext); + ASSERT_TRUE(result.HasValue()); + std::size_t counter = 0; + for (auto line : result->Lines()) + { + ASSERT_TRUE(line.HasValue()); + ++counter; + } + EXPECT_EQ(counter, 1); + EXPECT_EQ(result->BytesRead(), input.size()); +} + +TEST_F(InputStreamTest, LimitsHandling_7) +{ + constexpr auto limit = InputStream::cMaxReadSize; + string input; + for (std::size_t i = 0; i < 1023; ++i) + { + input += string(InputStream::cMaxReadSize / 1024, 'x'); + input += '\n'; + } + const auto filename = mContext.MakeTempfile(input); + auto result = InputStream::Open(filename, mContext); + ASSERT_TRUE(result.HasValue()); + std::size_t counter = 0; + for (auto line : result->Lines()) + { + ASSERT_TRUE(line.HasValue()); + ++counter; + } + // +1 for the trailing empty line + EXPECT_EQ(counter, 1024); + EXPECT_TRUE(result->BytesRead() < limit); +} + +TEST_F(InputStreamTest, LimitsHandling_8) +{ + constexpr auto limit = InputStream::cMaxReadSize; + string input; + for (std::size_t i = 0; i < 1024; ++i) + { + input += string(InputStream::cMaxReadSize / 1024, 'x'); + input += '\n'; + } + const auto filename = mContext.MakeTempfile(input); + mContext.SetSpecialFilePath("/etc/passwd", filename); + auto result = InputStream::Open("/etc/passwd", mContext); + ASSERT_TRUE(result.HasValue()); + std::size_t counter = 0; + for (auto line : result->Lines()) + { + ASSERT_TRUE(line.HasValue()); + ++counter; + } + EXPECT_EQ(counter, 1024); + // We exceed the limit here, but won't be able to read next time + EXPECT_TRUE(result->BytesRead() > limit); + + // Limit reached + auto line = result->ReadLine(); + ASSERT_FALSE(line.HasValue()); + EXPECT_EQ(line.Error().code, E2BIG); +} From 582a8a1dcd5d735b7bef47ae373a61bdedc4076a Mon Sep 17 00:00:00 2001 From: Robert Wojciechowski Date: Fri, 14 Nov 2025 12:57:39 +0100 Subject: [PATCH 3/6] Nest iterators in a namespace --- src/modules/complianceengine/src/lib/InputStream.cpp | 8 +++++--- src/modules/complianceengine/src/lib/InputStream.h | 8 +++++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/modules/complianceengine/src/lib/InputStream.cpp b/src/modules/complianceengine/src/lib/InputStream.cpp index 75322c42a5..da5546f525 100644 --- a/src/modules/complianceengine/src/lib/InputStream.cpp +++ b/src/modules/complianceengine/src/lib/InputStream.cpp @@ -126,9 +126,9 @@ const string& InputStream::GetFileName() const return mFileName; } -LinesRange InputStream::Lines() & +InputStreamIterators::LinesRange InputStream::Lines() & { - return LinesRange(*this); + return InputStreamIterators::LinesRange(*this); } size_t InputStream::BytesRead() const @@ -136,6 +136,8 @@ size_t InputStream::BytesRead() const return mBytesRead; } +namespace InputStreamIterators +{ LinesIterator::LinesIterator(InputStream& stream) : mStream(stream) { @@ -267,5 +269,5 @@ LinesIterator LinesRange::end() const { return LinesIterator(mStream); } - +} // namespace InputStreamIterators } // namespace ComplianceEngine diff --git a/src/modules/complianceengine/src/lib/InputStream.h b/src/modules/complianceengine/src/lib/InputStream.h index 104b7d8633..1fd85c827c 100644 --- a/src/modules/complianceengine/src/lib/InputStream.h +++ b/src/modules/complianceengine/src/lib/InputStream.h @@ -11,7 +11,10 @@ namespace ComplianceEngine { +namespace InputStreamIterators +{ class LinesRange; +} // namespace InputStreamIterators // This class wraps the C++ std::ifstream with // CE-specific error handling and a read size limit. @@ -54,7 +57,7 @@ class InputStream const std::string& GetFileName() const; // Obtains a range for line-by-line range-based iteration. - LinesRange Lines() &; + InputStreamIterators::LinesRange Lines() &; // Obtains the number of bytes read so far. std::size_t BytesRead() const; @@ -77,6 +80,8 @@ class InputStream std::size_t mBytesRead = 0; }; +namespace InputStreamIterators +{ // This class allows to iterate a file line by line. class LinesIterator { @@ -129,6 +134,7 @@ class LinesRange private: InputStream& mStream; }; +} // namespace InputStreamIterators } // namespace ComplianceEngine #endif // COMPLIANCEENGINE_FILE_STREAM_H From 82b2b07a53ed5107e66dba43e70cef39e6ea42de Mon Sep 17 00:00:00 2001 From: Robert Wojciechowski Date: Fri, 14 Nov 2025 14:17:25 +0100 Subject: [PATCH 4/6] Add RHEL-7 fallback with unique ptr, additinoal cosmetics --- .../complianceengine/src/lib/Evaluator.h | 4 +- .../complianceengine/src/lib/InputStream.cpp | 38 ++++++++++--------- .../complianceengine/src/lib/InputStream.h | 4 +- .../src/lib/procedures/FileRegexMatch.cpp | 3 +- .../src/lib/procedures/PackageInstalled.cpp | 29 ++++++-------- 5 files changed, 37 insertions(+), 41 deletions(-) diff --git a/src/modules/complianceengine/src/lib/Evaluator.h b/src/modules/complianceengine/src/lib/Evaluator.h index 86b0a32042..95b5be7f96 100644 --- a/src/modules/complianceengine/src/lib/Evaluator.h +++ b/src/modules/complianceengine/src/lib/Evaluator.h @@ -15,10 +15,10 @@ #include #include -#define AssertResult(variable, fmt, ...) \ +#define AssertResult(variable, ...) \ if (!(variable).HasValue()) \ { \ - OsConfigLogError(context.GetLogHandle(), fmt, __VA_ARGS__); \ + OsConfigLogError(context.GetLogHandle(), __VA_ARGS__); \ return (variable).Error(); \ } diff --git a/src/modules/complianceengine/src/lib/InputStream.cpp b/src/modules/complianceengine/src/lib/InputStream.cpp index da5546f525..ac767df096 100644 --- a/src/modules/complianceengine/src/lib/InputStream.cpp +++ b/src/modules/complianceengine/src/lib/InputStream.cpp @@ -15,7 +15,6 @@ InputStream::InputStream(string fileName, ContextInterface& context) : mContext(context), mFileName(std::move(fileName)) { - assert(!mStream.is_open()); } InputStream::InputStream(InputStream&& other) noexcept @@ -23,7 +22,8 @@ InputStream::InputStream(InputStream&& other) noexcept mFileName(std::move(other.mFileName)), mStream(std::move(other.mStream)) { - assert(mStream.is_open()); + assert(mStream); + assert(mStream->is_open()); } InputStream& InputStream::operator=(InputStream&& other) noexcept @@ -35,7 +35,8 @@ InputStream& InputStream::operator=(InputStream&& other) noexcept assert(&mContext == &other.mContext); mFileName = std::move(other.mFileName); mStream = std::move(other.mStream); - assert(mStream.is_open()); + assert(mStream); + assert(mStream->is_open()); return *this; } @@ -50,20 +51,21 @@ Result InputStream::Open(const string& fileName, ContextInterface& } InputStream result(fileName, context); - result.mStream.open(context.GetSpecialFilePath(fileName)); - if (!result.mStream.is_open()) + result.mStream.reset(new std::ifstream(context.GetSpecialFilePath(fileName))); + if (!result.mStream->is_open()) { OsConfigLogInfo(context.GetLogHandle(), "Failed to open '%s'", result.mFileName.c_str()); return Error(string("failed to open '") + result.mFileName + "'"); } - assert(result.mStream.is_open()); + assert(result.Good()); return result; } Result InputStream::ReadLine() { - assert(mStream.is_open()); + assert(mStream); + assert(mStream->is_open()); if (mBytesRead >= cMaxReadSize) { OsConfigLogError(mContext.GetLogHandle(), "Maximum file '%s' read size reached", mFileName.c_str()); @@ -71,14 +73,14 @@ Result InputStream::ReadLine() } // We want to always check with Good() before reading - if (mStream.eof()) + if (mStream->eof()) { OsConfigLogError(mContext.GetLogHandle(), "Attempted to read file '%s' after EOF", mFileName.c_str()); return Error(string("attempted to read file '") + mFileName + "' after EOF", EBADFD); } // We won't return empty strings in case an error happened previously - if (mStream.fail() || mStream.bad()) + if (mStream->fail() || mStream->bad()) { OsConfigLogError(mContext.GetLogHandle(), "Attempted to read file '%s' after failure", mFileName.c_str()); return Error(string("attempted to read file '") + mFileName + "' after failure", EBADFD); @@ -88,20 +90,20 @@ Result InputStream::ReadLine() // Read the data string line; - std::getline(mStream, line); + std::getline(*mStream, line); // Include the line size without the newline character mBytesRead += line.size(); - if (!mStream.eof()) + if (!mStream->eof()) { // Here we know a newline has been parsed. ++mBytesRead; } - if (mStream.bad()) + if (mStream->bad()) { // fail() may return true here in case there's no trailing newline character, so we stick to bad(). - OsConfigLogError(mContext.GetLogHandle(), "Failed to read line from '%s': %d", mFileName.c_str(), static_cast(mStream.rdstate())); + OsConfigLogError(mContext.GetLogHandle(), "Failed to read line from '%s': %d", mFileName.c_str(), static_cast(mStream->rdstate())); return Error("failed to read line from '" + mFileName + "'", EBADFD); } @@ -110,19 +112,19 @@ Result InputStream::ReadLine() bool InputStream::Good() const { - assert(mStream.is_open()); - return mBytesRead < cMaxReadSize && mStream.good(); + assert(mStream->is_open()); + return mBytesRead < cMaxReadSize && mStream->good(); } bool InputStream::AtEnd() const { - assert(mStream.is_open()); - return mStream.eof(); + assert(mStream->is_open()); + return mStream->eof(); } const string& InputStream::GetFileName() const { - assert(mStream.is_open()); + assert(mStream->is_open()); return mFileName; } diff --git a/src/modules/complianceengine/src/lib/InputStream.h b/src/modules/complianceengine/src/lib/InputStream.h index 1fd85c827c..4b8dcbe261 100644 --- a/src/modules/complianceengine/src/lib/InputStream.h +++ b/src/modules/complianceengine/src/lib/InputStream.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace ComplianceEngine { @@ -74,7 +75,8 @@ class InputStream std::string mFileName; // The underlying stream. - std::ifstream mStream; + // Note: wrapped with unique ptr as fstream is not movable on RHEL7-based platforms. + std::unique_ptr mStream; // The number of bytes read so far. std::size_t mBytesRead = 0; diff --git a/src/modules/complianceengine/src/lib/procedures/FileRegexMatch.cpp b/src/modules/complianceengine/src/lib/procedures/FileRegexMatch.cpp index b3da6662db..dd8aa12d8d 100644 --- a/src/modules/complianceengine/src/lib/procedures/FileRegexMatch.cpp +++ b/src/modules/complianceengine/src/lib/procedures/FileRegexMatch.cpp @@ -50,8 +50,7 @@ Result MultilineMatch(const std::string& filename, const string& matchPatt } int lineNumber = 0; - // Special case for empty files, read empty line then - for (auto line : input->Lines()) + for (const auto& line : input->Lines()) { AssertResult(line, "Failed to read '%s'", filename.c_str()); lineNumber++; diff --git a/src/modules/complianceengine/src/lib/procedures/PackageInstalled.cpp b/src/modules/complianceengine/src/lib/procedures/PackageInstalled.cpp index c27bbc1ce4..a5db3e7c01 100644 --- a/src/modules/complianceengine/src/lib/procedures/PackageInstalled.cpp +++ b/src/modules/complianceengine/src/lib/procedures/PackageInstalled.cpp @@ -86,23 +86,21 @@ Result LoadPackageCache(const std::string& path, ContextInterface& cache.lastUpdateTime = 0; cache.packageManager = PackageManagerType::Autodetect; cache.packages.clear(); - auto cacheFileResult = ComplianceEngine::InputStream::Open(path, context); - if (!cacheFileResult.HasValue()) - { - return cacheFileResult.Error(); - } - auto cacheFile = std::move(cacheFileResult.Value()); + auto cacheFile = ComplianceEngine::InputStream::Open(path, context); + AssertResult(cacheFile, "Failed to open cache file"); - auto header = cacheFile.ReadLine(); - if (!header.HasValue()) + auto header = cacheFile->ReadLine(); + AssertResult(header, "Failed to read cache header"); + if (0 != header->find(pkgCacheHeader, 0)) { - OsConfigLogError(context.GetLogHandle(), "Failed to read '%s'", path.c_str()); - return header.Error(); + OsConfigLogError(context.GetLogHandle(), "Invalid cache file format"); + return Error("Invalid cache file format"); } - auto separatorPos = header->find('@'); + const auto separatorPos = header->find('@'); if (std::string::npos == separatorPos) { + OsConfigLogError(context.GetLogHandle(), "Invalid cache file format"); return Error("Invalid cache file header format"); } @@ -122,14 +120,9 @@ Result LoadPackageCache(const std::string& path, ContextInterface& return Error("Invalid timestamp in cache file header"); } - while (cacheFile.Good()) + for (auto line : cacheFile->Lines()) { - auto line = cacheFile.ReadLine(); - if (!line) - { - OsConfigLogError(context.GetLogHandle(), "%s", line.Error().message.c_str()); - return line.Error(); - } + AssertResult(line, "Failed to read cache entry"); if (!line->empty()) { From 334e1666198694ed446edc2784cf932696f84338 Mon Sep 17 00:00:00 2001 From: Robert Wojciechowski Date: Fri, 14 Nov 2025 14:24:13 +0100 Subject: [PATCH 5/6] Cosmetics --- src/modules/complianceengine/src/lib/InputStream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/complianceengine/src/lib/InputStream.cpp b/src/modules/complianceengine/src/lib/InputStream.cpp index ac767df096..0393c807be 100644 --- a/src/modules/complianceengine/src/lib/InputStream.cpp +++ b/src/modules/complianceengine/src/lib/InputStream.cpp @@ -59,7 +59,7 @@ Result InputStream::Open(const string& fileName, ContextInterface& } assert(result.Good()); - return result; + return Result(std::move(result)); } Result InputStream::ReadLine() From 073b08ec5401d63720c240950109d719a3e8433e Mon Sep 17 00:00:00 2001 From: Robert Wojciechowski Date: Fri, 14 Nov 2025 14:30:29 +0100 Subject: [PATCH 6/6] Cosmetics #2 --- src/modules/complianceengine/src/lib/InputStream.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/modules/complianceengine/src/lib/InputStream.cpp b/src/modules/complianceengine/src/lib/InputStream.cpp index 0393c807be..7c05a6e0df 100644 --- a/src/modules/complianceengine/src/lib/InputStream.cpp +++ b/src/modules/complianceengine/src/lib/InputStream.cpp @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include #include #include #include @@ -252,12 +253,14 @@ LinesRange::LinesRange(LinesRange&& other) noexcept LinesRange& LinesRange::operator=(const LinesRange& other) { + UNUSED(other); assert(&mStream == &other.mStream); return *this; } LinesRange& LinesRange::operator=(LinesRange&& other) noexcept { + UNUSED(other); assert(&mStream == &other.mStream); return *this; }