diff --git a/src/AntiDupl/AntiDupl.cpp b/src/AntiDupl/AntiDupl.cpp
index 0890612f..9d289f47 100644
--- a/src/AntiDupl/AntiDupl.cpp
+++ b/src/AntiDupl/AntiDupl.cpp
@@ -80,6 +80,8 @@ typedef ad::TEngine* adEngineHandle;
if(p == NULL) \
return AD_ERROR_INVALID_POINTER;
+#define AD_DEBUG(msg) OutputDebugStringA(msg)
+
BOOL APIENTRY DllMain(HMODULE hModule, DWORD dwReasonForCall, LPVOID lpReserved)
{
@@ -153,9 +155,14 @@ DLLAPI adEngineHandle adCreateA(const adCharA * userPath)
DLLAPI adEngineHandle adCreateW(const adCharW * userPath)
{
+ AD_DEBUG("adCreateW: Starting\n");
ad::DumpInit();
+ AD_DEBUG("adCreateW: DumpInit done\n");
- return new ad::TEngine(ad::TString(userPath));
+ adEngineHandle handle = new ad::TEngine(ad::TString(userPath));
+ AD_DEBUG("adCreateW: TEngine created\n");
+
+ return handle;
}
DLLAPI adError adRelease(adEngineHandle handle)
diff --git a/src/AntiDupl/AntiDupl.vcxproj b/src/AntiDupl/AntiDupl.vcxproj
index e494d05a..492f37e0 100644
--- a/src/AntiDupl/AntiDupl.vcxproj
+++ b/src/AntiDupl/AntiDupl.vcxproj
@@ -23,6 +23,7 @@
DynamicLibrary
+
v143
@@ -51,14 +52,22 @@
- _WINDOWS;_USRDLL;ANTIDUPL_EXPORTS;%(PreprocessorDefinitions)
+ _WINDOWS;_USRDLL;ANTIDUPL_EXPORTS;ENABLE_CUDA;%(PreprocessorDefinitions)
4267
%(AdditionalIncludeDirectories)
+ cudart.lib;%(AdditionalDependencies)
Windows
MachineX64
+
+ 64
+
+
+ 64
+ compute_89,sm_89
+
"$(ProjectDir)".\adExternal.cmd
@@ -78,6 +87,7 @@
+
@@ -113,6 +123,7 @@
+
@@ -128,6 +139,8 @@
+
+
@@ -169,4 +182,7 @@
+
+
+
\ No newline at end of file
diff --git a/src/AntiDupl/adConfig.h b/src/AntiDupl/adConfig.h
index 4bb98959..656d63e9 100644
--- a/src/AntiDupl/adConfig.h
+++ b/src/AntiDupl/adConfig.h
@@ -45,8 +45,8 @@
#include
//#define AD_PERFORMANCE_TEST_ENABLE
-//#define AD_LOGGER_ENABLE
-//#define AD_DUMP_ENABLE
+#define AD_LOGGER_ENABLE
+#define AD_DUMP_ENABLE
#define AD_TURBO_JPEG_ENABLE
#include "adSimd.h"
diff --git a/src/AntiDupl/adDataCollector.cpp b/src/AntiDupl/adDataCollector.cpp
index 8ef65a35..bf599d90 100644
--- a/src/AntiDupl/adDataCollector.cpp
+++ b/src/AntiDupl/adDataCollector.cpp
@@ -32,11 +32,16 @@
#include "adImageUtils.h"
#include "adPixelData.h"
#include "adBlurringDetector.h"
+#include "adGPUManager.h"
+#include
+
+#define AD_DEBUG(msg) OutputDebugStringA(msg)
namespace ad
{
TDataCollector::TDataCollector(TEngine *pEngine)
- :m_pOptions(pEngine->Options()),
+ :m_pEngine(pEngine),
+ m_pOptions(pEngine->Options()),
m_pResult(pEngine->Result())
{
for(int size = INITIAL_REDUCED_IMAGE_SIZE; size > m_pOptions->advanced.reducedImageSize; size >>= 1)
@@ -73,7 +78,7 @@ namespace ad
TImage *pImage = TImage::Load(pImageData->hGlobal, m_pOptions);
if(pImage)
{
- pImageData->height = (TUInt32)pImage->View()->height;
+ pImageData->height = (TUInt32)pImage->View()->height;
pImageData->width = (TUInt32)pImage->View()->width;
pImageData->type = (TImageType)pImage->Format();
@@ -98,13 +103,46 @@ namespace ad
pImageData->imageExif = pImage->ImageExif();
- Simd::ResizeBilinear(gray, *m_pGrayBuffers.front());
+ Simd::Resize(gray, *m_pGrayBuffers.front());
for(size_t i = 1; i < m_pGrayBuffers.size(); ++i)
Simd::ReduceGray2x2(*m_pGrayBuffers[i - 1], *m_pGrayBuffers[i]);
TPixelData & data = *pImageData->data;
ReduceGray2x2(*m_pGrayBuffers.back(), TView(data.side, data.side, data.side, TView::Gray8, data.main));
data.filled = true;
+ // Upload thumbnail to GPU — only for non-AllVsAll mode
+ // In AllVsAll mode, ExecuteGpuAllVsAllComparison does its own mass upload
+ if (!m_pEngine->SkipComparisonDuringCollection())
+ {
+ // Ensure GPU buffer is initialized before first upload
+ static bool gpuBufferInitialized = false;
+ if (!gpuBufferInitialized)
+ {
+ AD_DEBUG("FillPixelData: Initializing GPU buffer\n");
+ size_t estimatedCapacity = 10000; // Start with reasonable estimate
+ size_t thumbSize = m_pOptions->advanced.reducedImageSize * m_pOptions->advanced.reducedImageSize;
+ if (m_pEngine->GpuManager()->EnsureCapacity(estimatedCapacity, thumbSize))
+ {
+ gpuBufferInitialized = true;
+ AD_DEBUG("FillPixelData: GPU buffer initialized\n");
+ }
+ else
+ {
+ AD_DEBUG("FillPixelData: GPU buffer initialization FAILED\n");
+ }
+ }
+
+ // Upload thumbnail to GPU immediately
+ if (m_pEngine->GpuManager()->UploadThumbnail(pImageData->globalIdx, data.main))
+ {
+ // Successfully uploaded
+ }
+ else
+ {
+ AD_DEBUG("FillPixelData: UploadThumbnail FAILED\n");
+ }
+ }
+
delete pImage;
}
else
diff --git a/src/AntiDupl/adDataCollector.h b/src/AntiDupl/adDataCollector.h
index 75647891..df87c0b8 100644
--- a/src/AntiDupl/adDataCollector.h
+++ b/src/AntiDupl/adDataCollector.h
@@ -38,6 +38,7 @@ namespace ad
//-------------------------------------------------------------------------
class TDataCollector
{
+ TEngine *m_pEngine;
TOptions *m_pOptions;
TResultStorage *m_pResult;
std::vector m_pGrayBuffers;
diff --git a/src/AntiDupl/adDump.cpp b/src/AntiDupl/adDump.cpp
index ee73bc4e..1b02b70b 100644
--- a/src/AntiDupl/adDump.cpp
+++ b/src/AntiDupl/adDump.cpp
@@ -31,7 +31,7 @@
namespace ad
{
- const DWORD STATUS_FATAL_APP_EXIT = 0x40000015;
+ const DWORD AD_STATUS_FATAL_APP_EXIT = 0x40000015;
typedef BOOL (*MiniDumpWriteDumpPtr)(
HANDLE hProcess,
@@ -107,7 +107,7 @@ namespace ad
{
DWORD exception_code = info->ExceptionRecord->ExceptionCode;
void *address = info->ExceptionRecord->ExceptionAddress;
- if (exception_code == STATUS_FATAL_APP_EXIT)
+ if (exception_code == AD_STATUS_FATAL_APP_EXIT)
{// abort()
MessageBox(NULL, TEXT("Program aborted."),
TEXT("Crash report"), MB_OK | MB_ICONERROR);
diff --git a/src/AntiDupl/adEngine.cpp b/src/AntiDupl/adEngine.cpp
index 73c22d15..ebd486b2 100644
--- a/src/AntiDupl/adEngine.cpp
+++ b/src/AntiDupl/adEngine.cpp
@@ -36,27 +36,112 @@
#include "adPerformance.h"
#include "adLogger.h"
#include "adFileUtils.h"
+#include "adGPUManager.h"
+#include "adGPU.h"
+#include "adStatus.h"
+#include
+#include
+#include
+
+#define AD_DEBUG(msg) OutputDebugStringA(msg)
+#define AD_DEBUG_FMT(msg, ...) \
+ do { \
+ char buf[512]; \
+ snprintf(buf, sizeof(buf), msg, __VA_ARGS__); \
+ OutputDebugStringA(buf); \
+ } while(0)
namespace ad
{
TEngine::TEngine(const TString & userPath)
: _userPath(userPath)
{
+ AD_DEBUG("TEngine: Constructor starting\n");
+
#ifdef AD_LOGGER_ENABLE
TLogger::s_logger.SetFileOut((UserPath() + TEXT("\\debug_log.txt")).c_str(), true);
#endif//AD_LOGGER_ENABLE
+
+ AD_DEBUG("TEngine: Creating TInit\n");
m_pInit = new TInit();
+
+ AD_DEBUG("TEngine: Creating TOptions\n");
m_pOptions = new TOptions(userPath);
+
+ AD_DEBUG("TEngine: Creating TStatus\n");
m_pStatus = new TStatus();
+
+ AD_DEBUG("TEngine: Creating TGpuManager\n");
+ m_pGpuManager = new TGpuManager();
+
+ AD_DEBUG("TEngine: TGpuManager created, IsAvailable=1\n");
+
+ if (m_pGpuManager->IsAvailable())
+ {
+ AD_DEBUG("TEngine: GPU is available, getting device info\n");
+
+ const GpuDeviceInfo& info = m_pGpuManager->DeviceInfo();
+ std::stringstream ss;
+ ss << "GPU acceleration initialized: " << info.name
+ << " (" << (info.totalGlobalMem / (1024 * 1024)) << " MB VRAM, Compute "
+ << info.computeMajor << "." << info.computeMinor << ")";
+#ifdef AD_LOGGER_ENABLE
+ AD_LOG(ss.str().c_str());
+#endif//AD_LOGGER_ENABLE
+
+ // GPU Sanity Check: Test mathematical parity
+ AD_DEBUG("TEngine: Starting GPU sanity check\n");
+
+ const size_t testSize = 1024;
+ uint8_t h_test1[testSize], h_test2[testSize];
+ double cpuSum = 0;
+ for(size_t i = 0; i < testSize; ++i) {
+ h_test1[i] = (uint8_t)(i % 256);
+ h_test2[i] = (uint8_t)(255 - (i % 256));
+ double diff = (double)h_test1[i] - (double)h_test2[i];
+ cpuSum += diff * diff;
+ }
+
+ AD_DEBUG("TEngine: Calling GpuCompareSquaredSum\n");
+
+ double gpuSum = GpuCompareSquaredSum(h_test1, h_test2, testSize);
+
+ AD_DEBUG("TEngine: GpuCompareSquaredSum returned\n");
+
+ std::stringstream ts;
+ ts << "CUDA Sanity Check: CPU=" << cpuSum << ", GPU=" << gpuSum;
+ double tolerance = cpuSum * 0.001; // 0.1% relative tolerance
+ if (fabs(cpuSum - gpuSum) <= tolerance) {
+ ts << " [SUCCESS - PARITY MATCH]";
+ } else {
+ ts << " [FAILURE - MATH MISMATCH] Tolerance: " << tolerance;
+ }
+#ifdef AD_LOGGER_ENABLE
+ AD_LOG(ts.str().c_str());
+#endif//AD_LOGGER_ENABLE
+ }
+ else
+ {
+ AD_DEBUG("TEngine: GPU not available\n");
+#ifdef AD_LOGGER_ENABLE
+ AD_LOG("GPU acceleration not available.");
+#endif//AD_LOGGER_ENABLE
+ }
+
+ AD_DEBUG("TEngine: Creating storage objects\n");
+
m_pMistakeStorage = new TMistakeStorage(this);
m_pImageDataStorage = new TImageDataStorage(this);
- m_pRecycleBin = new TRecycleBin(this);
+ m_pRecycleBin = new TRecycleBin(this);
m_pResult = new TResultStorage(this);
m_pImageDataPtrs = new TImageDataPtrs();
m_pCriticalSection = new TCriticalSection();
m_pCompareManager = new TCompareManager(this);
m_pCollectManager = new TCollectManager(this, m_pCompareManager);
m_pSearcher = new TSearcher(this, m_pImageDataPtrs);
+ m_skipComparisonDuringCollection = false;
+
+ AD_DEBUG("TEngine: Constructor finished successfully\n");
}
TEngine::~TEngine()
@@ -71,6 +156,7 @@ namespace ad
delete m_pCollectManager;
delete m_pSearcher;
delete m_pRecycleBin;
+ delete m_pGpuManager;
delete m_pStatus;
delete m_pOptions;
#ifdef AD_LOGGER_ENABLE
@@ -82,42 +168,307 @@ namespace ad
#endif//AD_LOGGER_ENABLE
}
+ void TEngine::UpdateGpuDatabase()
+ {
+ AD_DEBUG("UpdateGpuDatabase: Starting\n");
+
+ if (m_pGpuManager && m_pGpuManager->IsAvailable())
+ {
+ AD_DEBUG("UpdateGpuDatabase: GPU is available\n");
+
+ const TImageDataStorage::TStorage& storage = m_pImageDataStorage->Storage();
+ AD_DEBUG("UpdateGpuDatabase: Storage size\n");
+
+ size_t reducedImageSize = m_pOptions->advanced.reducedImageSize;
+ size_t thumbSize = reducedImageSize * reducedImageSize;
+ AD_DEBUG("UpdateGpuDatabase: reducedImageSize and thumbSize calculated\n");
+
+ // Ensure GPU has enough capacity for the current database
+ AD_DEBUG("UpdateGpuDatabase: Calling EnsureCapacity\n");
+
+ if (!m_pGpuManager->EnsureCapacity(storage.size(), thumbSize))
+ {
+ AD_DEBUG("UpdateGpuDatabase: EnsureCapacity FAILED\n");
+#ifdef AD_LOGGER_ENABLE
+ AD_LOG("GPU: Failed to ensure capacity for database.");
+#endif
+ return;
+ }
+
+ AD_DEBUG("UpdateGpuDatabase: EnsureCapacity succeeded\n");
+
+ size_t count = 0;
+ for (TImageDataStorage::TStorage::const_iterator it = storage.begin(); it != storage.end(); ++it)
+ {
+ TImageDataPtr pImageData = it->second;
+ if (pImageData->data && pImageData->data->filled && pImageData->data->main != nullptr)
+ {
+ if (m_pGpuManager->UploadThumbnail(pImageData->globalIdx, pImageData->data->main))
+ {
+ count++;
+ }
+ else
+ {
+ AD_DEBUG("UpdateGpuDatabase: Upload FAILED\n");
+ }
+ }
+ }
+ AD_DEBUG("UpdateGpuDatabase: Uploaded thumbnails\n");
+
+#ifdef AD_LOGGER_ENABLE
+ if (count > 0)
+ {
+ std::stringstream ss;
+ ss << "GPU: Synchronized " << count << " thumbnails to VRAM.";
+ AD_LOG(ss.str().c_str());
+ }
+#endif
+ }
+ else
+ {
+ AD_DEBUG("UpdateGpuDatabase: GPU not available\n");
+ }
+ AD_DEBUG("UpdateGpuDatabase: Finished\n");
+ }
+
+ // Структура для контекста callback
+ struct MatchProcessContext {
+ TEngine* engine;
+ const std::vector* imageByIndex;
+ size_t thumbSize;
+ double maxDifference;
+ size_t totalProcessed;
+ size_t bufferFullCount;
+ };
+
+ // Callback функция для streaming обработки matches
+ static void MatchCallback(const void* batch, size_t count, void* context) {
+ MatchProcessContext* ctx = (MatchProcessContext*)context;
+ const Match* matches = (const Match*)batch;
+
+ for (size_t i = 0; i < count; i++) {
+ // Проверяем индексы на валидность
+ if (matches[i].image1 >= ctx->imageByIndex->size() ||
+ matches[i].image2 >= ctx->imageByIndex->size()) {
+ continue;
+ }
+
+ TImageDataPtr pImage1 = ctx->imageByIndex->at(matches[i].image1);
+ TImageDataPtr pImage2 = ctx->imageByIndex->at(matches[i].image2);
+
+ // Пропускаем пары с nullptr (изображения без данных)
+ if (!pImage1 || !pImage2) {
+ continue;
+ }
+
+ ctx->engine->Result()->AddDuplImagePair(pImage1, pImage2, matches[i].difference, AD_TRANSFORM_TURN_0);
+ ctx->totalProcessed++;
+
+ // Обновляем прогресс (для GPU режима)
+ if (ctx->totalProcessed % 10000 == 0) {
+ ctx->engine->Status()->SetProgress(ctx->totalProcessed, ctx->totalProcessed); // dummy update
+ }
+ }
+ }
+
+ // NEW: GPU AllVsAll comparison с streaming processing
+ // Возвращает true при успешном выполнении, false при ошибке
+ bool TEngine::ExecuteGpuAllVsAllComparison()
+ {
+ AD_DEBUG("ExecuteGpuAllVsAllComparison: Starting\n");
+
+ if (!m_pGpuManager || !m_pGpuManager->IsAvailable()) {
+ AD_DEBUG("ExecuteGpuAllVsAllComparison: GPU not available\n");
+ return false;
+ }
+
+ const TImageDataStorage::TStorage& storage = m_pImageDataStorage->Storage();
+ size_t count = storage.size();
+ if (count == 0) {
+ AD_DEBUG("ExecuteGpuAllVsAllComparison: Empty storage\n");
+ return false;
+ }
+
+ size_t reducedImageSize = m_pOptions->advanced.reducedImageSize;
+ size_t thumbSize = reducedImageSize * reducedImageSize;
+
+ AD_DEBUG_FMT("ExecuteGpuAllVsAllComparison: Preparing data for %zu images\n", count);
+
+ // Собираем ТОЛЬКО валидные thumbnails в компактный массив
+ // Это предотвращает сравнение изображений без данных
+ std::vector allThumbnails;
+ std::vector allCrcArray;
+ std::vector imageByIndex;
+ allThumbnails.reserve(count * thumbSize);
+ allCrcArray.reserve(count);
+ imageByIndex.reserve(count);
+
+ size_t validCount = 0;
+
+ for (TImageDataStorage::TStorage::const_iterator it = storage.begin(); it != storage.end(); ++it) {
+ TImageDataPtr pImageData = it->second;
+ if (pImageData->data && pImageData->data->filled && pImageData->data->main != nullptr) {
+ // Копируем thumbnail
+ allThumbnails.resize((validCount + 1) * thumbSize);
+ memcpy(&allThumbnails[validCount * thumbSize], pImageData->data->main, thumbSize);
+
+ // Копируем CRC
+ allCrcArray.push_back(pImageData->crc32c);
+
+ // Сохраняем указатель
+ imageByIndex.push_back(pImageData);
+
+ validCount++;
+ }
+ }
+
+ AD_DEBUG_FMT("ExecuteGpuAllVsAllComparison: %zu valid thumbnails out of %zu\n", validCount, count);
+
+ if (validCount < 2) {
+ AD_DEBUG("ExecuteGpuAllVsAllComparison: Not enough valid images\n");
+ return false;
+ }
+
+ // Вычисляем threshold и maxDifference как в оригинальном TImageComparer
+ int thresholdPerPixel = Simd::Square(m_pOptions->compare.thresholdDifference * PIXEL_MAX_DIFFERENCE) /
+ Simd::Square(DENOMINATOR);
+ int mainThreshold = (int)(thumbSize * thresholdPerPixel);
+ double threshold = (double)mainThreshold;
+ double maxDifference = (double)(Simd::Square(PIXEL_MAX_DIFFERENCE) * thumbSize);
+
+ AD_DEBUG_FMT("ExecuteGpuAllVsAllComparison: threshold=%f, maxDifference=%f\n", threshold, maxDifference);
+
+ // Streaming processing context
+ MatchProcessContext ctx;
+ ctx.engine = this;
+ ctx.imageByIndex = &imageByIndex;
+ ctx.thumbSize = thumbSize;
+ ctx.maxDifference = maxDifference;
+ ctx.totalProcessed = 0;
+ ctx.bufferFullCount = 0;
+
+ // Batch size для streaming readback: 5M matches = 60MB RAM
+ const size_t BATCH_MATCHES = 5000000;
+
+ AD_DEBUG_FMT("ExecuteGpuAllVsAllComparison: Calling GPU with %zu valid images (batch size: %zu)\n", validCount, BATCH_MATCHES);
+
+ bool success = m_pGpuManager->CompareAllVsAll(
+ allThumbnails.data(),
+ allCrcArray.data(),
+ validCount, // Используем validCount вместо count
+ thumbSize,
+ threshold,
+ maxDifference,
+ ADDITIONAL_DIFFERENCE_FOR_DIFFERENT_CRC32,
+ &ctx,
+ MatchCallback,
+ BATCH_MATCHES);
+
+ if (success) {
+ AD_DEBUG_FMT("ExecuteGpuAllVsAllComparison: Processed %zu total matches\n", ctx.totalProcessed);
+ }
+ else {
+ AD_DEBUG("ExecuteGpuAllVsAllComparison: GPU comparison FAILED\n");
+ }
+
+ // Освобождаем большую память заранее
+ allThumbnails.clear();
+ allThumbnails.shrink_to_fit();
+ allCrcArray.clear();
+ allCrcArray.shrink_to_fit();
+ imageByIndex.clear();
+ imageByIndex.shrink_to_fit();
+
+ AD_DEBUG("ExecuteGpuAllVsAllComparison: Finished\n");
+ return success;
+ }
+
void TEngine::Search()
{
+ AD_DEBUG("Search: Starting\n");
+
AD_FUNCTION_PERFORMANCE_TEST
m_pStatus->ClearStatistic();
m_pStatus->SetProgress(0, 0);
m_pResult->Clear();
+ // 1. First, search for images on disk or load from DB
+ AD_DEBUG("Search: Calling SearchImages\n");
m_pSearcher->SearchImages();
+ AD_DEBUG("Search: SearchImages completed\n");
- if(m_pOptions->compare.checkOnEquality == TRUE)
- {
- m_pCompareManager->Start(m_pImageDataPtrs->size());
- m_pCompareManager->SetPriority(THREAD_PRIORITY_LOWEST);
- }
+ // 2. Start collection threads
+ AD_DEBUG("Search: Starting collection manager\n");
m_pCollectManager->Start();
m_pCollectManager->SetPriority(THREAD_PRIORITY_BELOW_NORMAL);
- size_t current = 0, total = m_pImageDataPtrs->size();
- for(TImageDataPtrs::iterator it = m_pImageDataPtrs->begin();
+ // 3. GPU AllVsAll comparison (если включено и доступно)
+ bool useGpu = (m_pGpuManager && m_pGpuManager->IsAvailable() &&
+ m_pOptions->compare.algorithmComparing == AD_COMPARING_SQUARED_SUM &&
+ m_pOptions->advanced.ignoreFrameWidth == 0);
+
+ if (useGpu)
+ {
+ m_skipComparisonDuringCollection = true; // Отключаем старое сравнение ДО цикла
+ }
+ else
+ {
+ m_skipComparisonDuringCollection = false;
+ // 4. CPU comparison (старый подход) - нужно запустить CompareManager ДО сбора данных
+ AD_DEBUG("Search: Starting CPU comparison\n");
+
+ if(m_pOptions->compare.checkOnEquality == TRUE)
+ {
+ AD_DEBUG("Search: Starting compare manager\n");
+ m_pCompareManager->Start(m_pImageDataPtrs->size());
+ m_pCompareManager->SetPriority(THREAD_PRIORITY_NORMAL);
+ AD_DEBUG("Search: Compare manager started\n");
+ }
+ }
+
+ size_t current = 0, total = m_pImageDataPtrs->size();
+ AD_DEBUG("Search: Total images to process\n");
+
+ for(TImageDataPtrs::iterator it = m_pImageDataPtrs->begin();
it != m_pImageDataPtrs->end() && !m_pStatus->Stopped(); ++it, ++current)
{
TImageDataPtr pImageData = *it;
m_pCollectManager->Add(pImageData);
m_pStatus->SetProgress(current, total);
}
+ AD_DEBUG("Search: Collection loop finished\n");
+
m_pCollectManager->Finish();
+ AD_DEBUG("Search: Collection manager finished\n");
- if(m_pOptions->compare.checkOnEquality == TRUE)
+ if (useGpu)
{
- m_pStatus->SetProgress(current, total);
- m_pStatus->Wait(AD_THREAD_TYPE_MAIN, 0);
- m_pCompareManager->SetPriority(THREAD_PRIORITY_NORMAL);
- m_pCompareManager->Finish();
+ AD_DEBUG("Search: Using GPU AllVsAll comparison\n");
+ bool gpuSuccess = ExecuteGpuAllVsAllComparison();
+ m_skipComparisonDuringCollection = false;
+
+ if (!gpuSuccess) {
+ AD_DEBUG("Search: GPU comparison FAILED — no CPU fallback (too slow for large collections)\n");
+ // CPU fallback removed — O(N^2) CPU comparison is impractical for 10K+ images
+ // User should retry with smaller collection or check GPU memory availability
+ }
+ else {
+ AD_DEBUG("Search: GPU comparison completed successfully\n");
+ }
+ }
+ else
+ {
+ if(m_pOptions->compare.checkOnEquality == TRUE)
+ {
+ AD_DEBUG("Search: Waiting for compare manager to finish\n");
+ m_pCompareManager->Finish();
+ AD_DEBUG("Search: Compare manager finished\n");
+ }
}
m_pImageDataPtrs->clear();
m_pStatus->Reset();
+
+ AD_DEBUG("Search: Completed successfully\n");
}
}
diff --git a/src/AntiDupl/adEngine.h b/src/AntiDupl/adEngine.h
index 1395855e..7fa07106 100644
--- a/src/AntiDupl/adEngine.h
+++ b/src/AntiDupl/adEngine.h
@@ -44,6 +44,7 @@ namespace ad
class TSearcher;
class TRecycleBin;
class TCriticalSection;
+ class TGpuManager;
typedef TImageData *TImageDataPtr;
//-------------------------------------------------------------------------
@@ -56,6 +57,13 @@ namespace ad
void Search();
+ void UpdateGpuDatabase();
+ bool ExecuteGpuAllVsAllComparison(); // Возвращает true при успехе
+
+ // Flag to skip comparison during collection (for GPU AllVsAll mode)
+ bool m_skipComparisonDuringCollection;
+ bool SkipComparisonDuringCollection() const { return m_skipComparisonDuringCollection; }
+
const TString & UserPath() const { return _userPath; }
TStatus* Status() {return m_pStatus;}
TOptions* Options() {return m_pOptions;};
@@ -64,6 +72,7 @@ namespace ad
TResultStorage* Result() {return m_pResult;}
TCriticalSection* CriticalSection() {return m_pCriticalSection;}
TRecycleBin* RecycleBin() {return m_pRecycleBin;}
+ TGpuManager* GpuManager() {return m_pGpuManager;}
private:
TString _userPath;
@@ -79,6 +88,7 @@ namespace ad
TInit *m_pInit;
TSearcher *m_pSearcher;
TRecycleBin *m_pRecycleBin;
+ TGpuManager *m_pGpuManager;
};
//-------------------------------------------------------------------------
}
diff --git a/src/AntiDupl/adGPU.cu b/src/AntiDupl/adGPU.cu
new file mode 100644
index 00000000..e4b45e05
--- /dev/null
+++ b/src/AntiDupl/adGPU.cu
@@ -0,0 +1,815 @@
+/*
+* AntiDuplPlus Program (http://github.com/Sucotasch/AntiDuplPlus).
+*
+* Copyright (c) 2023-2026.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy
+* of this software and associated documentation files (the "Software"), to deal
+* in the Software without restriction, including without limitation the rights
+* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the Software is
+* furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*/
+#include "adGPU.h"
+#include "adLogger.h"
+#include
+#include
+#include
+#include
+#include
+#include
+
+#define AD_DEBUG(msg) OutputDebugStringA(msg)
+#define AD_DEBUG_FMT(msg, ...) \
+ do { \
+ char buf[512]; \
+ snprintf(buf, sizeof(buf), msg, __VA_ARGS__); \
+ OutputDebugStringA(buf); \
+ } while(0)
+
+namespace ad
+{
+ // Global VRAM Storage
+ static uint8_t* g_pDeviceThumbnailBuffer = nullptr;
+ static size_t g_bufferCapacity = 0;
+ static size_t g_thumbSize = 1024; // Default 32x32
+ static uint8_t* g_pQueryBuffer = nullptr;
+ static double* g_pResultBuffer = nullptr;
+ static size_t* g_pIndexBuffer = nullptr;
+
+ // --- Kernels ---
+
+ __global__ void SquaredSumKernel(const uint8_t* pSrc1, const uint8_t* pSrc2, size_t size, double* pResult)
+ {
+ extern __shared__ double shared_data[];
+ size_t tid = threadIdx.x;
+ size_t i = (size_t)blockIdx.x * blockDim.x + threadIdx.x;
+ double sum = 0;
+ if (i < size) {
+ double diff = (double)pSrc1[i] - (double)pSrc2[i];
+ sum = diff * diff;
+ }
+ shared_data[tid] = sum;
+ __syncthreads();
+ for (size_t s = blockDim.x / 2; s > 0; s >>= 1) {
+ if (tid < s) shared_data[tid] += shared_data[tid + s];
+ __syncthreads();
+ }
+ if (tid == 0) atomicAdd(pResult, shared_data[0]);
+ }
+
+ // NEW: AllVsAll kernel — grid-stride loop + shared memory + CRC check + final difference
+ // Каждый блок обрабатывает несколько строк i с шагом gridDim.x
+ __global__ void AllVsAllKernel(
+ const uint8_t* thumbnails, // Все thumbnails в VRAM
+ const uint64_t* crcArray, // CRC32c для каждого изображения
+ size_t thumbSize, // Размер одного thumbnail (1024)
+ size_t count, // Общее количество изображений
+ double threshold, // Порог squared difference
+ double maxDifference, // Максимальная разница для нормализации
+ double addDiffForCrcMismatch, // Добавка за несовпадение CRC
+ size_t maxMatches, // Максимум matches (bounds checking)
+ Match* results, // Sparse buffer для результатов
+ size_t* matchCount) // Atomic counter
+ {
+ // Shared memory для thumb1 — ускоряет чтение в 10-100 раз
+ extern __shared__ uint8_t shared_thumb[];
+
+ // Grid-stride loop: каждый блок обрабатывает несколько строк i
+ for (size_t i = blockIdx.x; i < count; i += gridDim.x) {
+ const uint8_t* thumb1_global = thumbnails + i * thumbSize;
+
+ // Загружаем thumb1 в shared memory кооперативно
+ for (size_t p = threadIdx.x; p < thumbSize; p += blockDim.x) {
+ shared_thumb[p] = thumb1_global[p];
+ }
+ __syncthreads();
+
+ // Каждый поток обрабатывает несколько j > i с stride
+ size_t numThreads = blockDim.x;
+
+ for (size_t j = i + 1 + threadIdx.x; j < count; j += numThreads) {
+ const uint8_t* thumb2 = thumbnails + j * thumbSize;
+
+ // Вычисляем squared difference из shared memory
+ double sumSqDiff = 0;
+ for (size_t p = 0; p < thumbSize; p++) {
+ double diff = (double)shared_thumb[p] - (double)thumb2[p];
+ sumSqDiff += diff * diff;
+ }
+
+ // Проверяем threshold ДО нормализации (как в CPU версии)
+ if (sumSqDiff <= threshold) {
+ // Атомарно получаем индекс с bounds checking
+ size_t idx = atomicAdd(matchCount, (size_t)1);
+
+ // ЗАЩИТА от переполнения буфера
+ if (idx < maxMatches) {
+ // Вычисляем финальный difference в процентах
+ double difference = sqrt(sumSqDiff / maxDifference) * 100.0;
+
+ // Добавляем штраф за несовпадение CRC
+ if (crcArray[i] != crcArray[j]) {
+ difference += addDiffForCrcMismatch;
+ }
+
+ results[idx].image1 = (uint32_t)i;
+ results[idx].image2 = (uint32_t)j;
+ results[idx].difference = (float)difference;
+ }
+ }
+ }
+ __syncthreads(); // Синхронизация перед следующей итерацией i
+ }
+ }
+
+ __global__ void OneVsManyKernel(const uint8_t* pQuery, const uint8_t* pDatabase,
+ size_t thumbSize, size_t count, double* pResults)
+ {
+ size_t dbIdx = blockIdx.x;
+ if (dbIdx >= count) return;
+
+ extern __shared__ double shared_sum[];
+ size_t tid = threadIdx.x;
+
+ double localSum = 0;
+ const uint8_t* pTarget = pDatabase + (size_t)dbIdx * thumbSize;
+
+ for (size_t i = tid; i < thumbSize; i += blockDim.x) {
+ double diff = (double)pQuery[i] - (double)pTarget[i];
+ localSum += diff * diff;
+ }
+
+ shared_sum[tid] = localSum;
+ __syncthreads();
+
+ for (size_t s = blockDim.x / 2; s > 0; s >>= 1) {
+ if (tid < s) shared_sum[tid] += shared_sum[tid + s];
+ __syncthreads();
+ }
+
+ if (tid == 0) {
+ pResults[dbIdx] = shared_sum[0];
+ }
+ }
+
+ __global__ void OneVsListKernel(const uint8_t* pQuery, const uint8_t* pDatabase,
+ const size_t* pIndices, size_t thumbSize, size_t count,
+ double* pResults, size_t maxBufferCapacity)
+ {
+ size_t listIdx = blockIdx.x;
+ if (listIdx >= count) return;
+
+ extern __shared__ double shared_sum_list[];
+ size_t tid = threadIdx.x;
+
+ double localSum = 0;
+ size_t dbIdx = pIndices[listIdx];
+
+ // Check bounds to prevent out-of-bounds access
+ if (dbIdx >= maxBufferCapacity) {
+ if (tid == 0) {
+ pResults[listIdx] = 1e100; // Maximum difference (no match)
+ }
+ return;
+ }
+
+ const uint8_t* pTarget = pDatabase + (size_t)dbIdx * thumbSize;
+
+ for (size_t i = tid; i < thumbSize; i += blockDim.x) {
+ double diff = (double)pQuery[i] - (double)pTarget[i];
+ localSum += diff * diff;
+ }
+
+ shared_sum_list[tid] = localSum;
+ __syncthreads();
+
+ for (size_t s = blockDim.x / 2; s > 0; s >>= 1) {
+ if (tid < s) shared_sum_list[tid] += shared_sum_list[tid + s];
+ __syncthreads();
+ }
+
+ if (tid == 0) {
+ pResults[listIdx] = shared_sum_list[0];
+ }
+ }
+
+ // --- Implementation ---
+
+ bool GpuInit(GpuDeviceInfo* pInfo)
+ {
+ AD_DEBUG("GpuInit: Starting\n");
+
+ int deviceCount = 0;
+ cudaError_t err = cudaGetDeviceCount(&deviceCount);
+ if (err != cudaSuccess || deviceCount == 0) {
+ AD_DEBUG("GpuInit: cudaGetDeviceCount failed\n");
+ return false;
+ }
+
+ AD_DEBUG("GpuInit: Got device count\n");
+
+ cudaDeviceProp prop;
+ err = cudaGetDeviceProperties(&prop, 0);
+ if (err != cudaSuccess) {
+ AD_DEBUG("GpuInit: cudaGetDeviceProperties failed\n");
+ return false;
+ }
+
+ AD_DEBUG("GpuInit: Got device properties\n");
+
+ if (pInfo) {
+ strncpy(pInfo->name, prop.name, 256);
+ pInfo->totalGlobalMem = prop.totalGlobalMem;
+ pInfo->computeMajor = prop.major;
+ pInfo->computeMinor = prop.minor;
+ pInfo->isCompatible = (prop.major >= 8);
+ }
+
+ AD_DEBUG("GpuInit: successful\n");
+ return true;
+ }
+
+ bool GpuCreateBuffer(size_t capacity, size_t thumbSize)
+ {
+ GpuReleaseBuffer();
+ if (capacity == 0 || thumbSize == 0) return true;
+
+ // Use temporary variable for validation before setting global state
+ size_t testThumbSize = thumbSize;
+
+ size_t freeMem = 0, totalMem = 0;
+ cudaMemGetInfo(&freeMem, &totalMem);
+
+ size_t requiredMem = capacity * testThumbSize + capacity * sizeof(double) + capacity * sizeof(size_t) + testThumbSize;
+
+ if (requiredMem > (size_t)(freeMem * 0.8))
+ {
+#ifdef AD_LOGGER_ENABLE
+ std::stringstream ss;
+ ss << "GPU: Not enough free VRAM! Required: " << (requiredMem / 1024 / 1024)
+ << " MB, Free: " << (freeMem / 1024 / 1024) << " MB.";
+ AD_LOG(ss.str().c_str());
+#endif
+ return false;
+ }
+
+ // Only set g_thumbSize after memory check passes
+ g_thumbSize = testThumbSize;
+
+ cudaError_t err;
+ err = cudaMalloc(&g_pDeviceThumbnailBuffer, capacity * g_thumbSize);
+ if (err != cudaSuccess) goto error;
+
+ err = cudaMalloc(&g_pQueryBuffer, g_thumbSize);
+ if (err != cudaSuccess) goto error;
+
+ err = cudaMalloc(&g_pResultBuffer, capacity * sizeof(double));
+ if (err != cudaSuccess) goto error;
+
+ err = cudaMalloc(&g_pIndexBuffer, capacity * sizeof(size_t));
+ if (err != cudaSuccess) goto error;
+
+ g_bufferCapacity = capacity;
+#ifdef AD_LOGGER_ENABLE
+ {
+ std::stringstream ss;
+ ss << "GPU: VRAM Allocated. Capacity: " << capacity << " units. Thumbnail Size: " << g_thumbSize << " bytes. Required: " << (requiredMem / 1024 / 1024) << " MB. Free VRAM: " << (freeMem / 1024 / 1024) << " MB.";
+ AD_LOG(ss.str().c_str());
+ }
+#endif
+ return true;
+
+ error:
+ GpuReleaseBuffer();
+ return false;
+ }
+
+ void GpuReleaseBuffer()
+ {
+ if (g_pDeviceThumbnailBuffer) cudaFree(g_pDeviceThumbnailBuffer);
+ if (g_pQueryBuffer) cudaFree(g_pQueryBuffer);
+ if (g_pResultBuffer) cudaFree(g_pResultBuffer);
+ if (g_pIndexBuffer) cudaFree(g_pIndexBuffer);
+ g_pDeviceThumbnailBuffer = nullptr;
+ g_pQueryBuffer = nullptr;
+ g_pResultBuffer = nullptr;
+ g_pIndexBuffer = nullptr;
+ g_bufferCapacity = 0;
+ }
+
+ void GpuRelease()
+ {
+ GpuReleaseBuffer();
+ cudaDeviceReset();
+ }
+
+ bool GpuUploadThumbnail(size_t index, const uint8_t* pData)
+ {
+ fprintf(stderr, "GpuUploadThumbnail: index=%zu, g_pDeviceThumbnailBuffer=%p, g_bufferCapacity=%zu\n",
+ index, (void*)g_pDeviceThumbnailBuffer, g_bufferCapacity);
+ fflush(stderr);
+
+ if (!g_pDeviceThumbnailBuffer) {
+ fprintf(stderr, "GpuUploadThumbnail: FAILED - buffer not allocated\n");
+ fflush(stderr);
+ return false;
+ }
+
+ if (index >= g_bufferCapacity) {
+ fprintf(stderr, "GpuUploadThumbnail: FAILED - index %zu exceeds capacity %zu\n",
+ index, g_bufferCapacity);
+ fflush(stderr);
+ return false;
+ }
+
+ if (pData == nullptr) {
+ fprintf(stderr, "GpuUploadThumbnail: FAILED - null data pointer\n");
+ fflush(stderr);
+ return false;
+ }
+
+ cudaError_t err = cudaMemcpy(g_pDeviceThumbnailBuffer + (size_t)index * g_thumbSize,
+ pData, g_thumbSize, cudaMemcpyHostToDevice);
+ if (err != cudaSuccess) {
+ fprintf(stderr, "GpuUploadThumbnail: FAILED - cudaMemcpy error: %s\n",
+ cudaGetErrorString(err));
+ fflush(stderr);
+ return false;
+ }
+
+ fprintf(stderr, "GpuUploadThumbnail: Success\n");
+ fflush(stderr);
+ return true;
+ }
+
+ bool GpuCompareOneVsMany(const uint8_t* pQuery, size_t startIdx, size_t count, double threshold,
+ size_t* pMatchIndices, double* pMatchDifferences, size_t* pMatchCount, size_t maxMatches)
+ {
+ if (!g_pDeviceThumbnailBuffer || (startIdx + count) > g_bufferCapacity || count == 0 ||
+ pQuery == nullptr || pMatchIndices == nullptr || pMatchDifferences == nullptr ||
+ pMatchCount == nullptr || maxMatches == 0)
+ return false;
+
+ if (cudaMemcpy(g_pQueryBuffer, pQuery, g_thumbSize, cudaMemcpyHostToDevice) != cudaSuccess) return false;
+
+ // Check for integer overflow before kernel launch
+ if (count > INT_MAX) {
+#ifdef AD_LOGGER_ENABLE
+ AD_LOG("GPU: Count exceeds INT_MAX, cannot launch kernel");
+#endif
+ return false;
+ }
+
+ int threadsPerBlock = 256;
+ OneVsManyKernel<<< (int)count, threadsPerBlock, threadsPerBlock * sizeof(double) >>>(
+ g_pQueryBuffer, g_pDeviceThumbnailBuffer + (size_t)startIdx * g_thumbSize, g_thumbSize, count, g_pResultBuffer);
+
+ if (cudaGetLastError() != cudaSuccess) return false;
+ if (cudaDeviceSynchronize() != cudaSuccess) return false;
+
+ std::vector results(count);
+ if (cudaMemcpy(results.data(), g_pResultBuffer, count * sizeof(double), cudaMemcpyDeviceToHost) != cudaSuccess) return false;
+
+ size_t found = 0;
+ for (size_t i = 0; i < count && found < maxMatches; ++i) {
+ if (results[i] <= threshold) {
+ pMatchIndices[found] = startIdx + i;
+ pMatchDifferences[found] = results[i];
+ found++;
+ }
+ }
+ *pMatchCount = found;
+ return true;
+ }
+
+ bool GpuCompareOneVsList(const uint8_t* pQuery, const size_t* pIndices, size_t count, double threshold,
+ size_t* pMatchIndices, double* pMatchDifferences, size_t* pMatchCount, size_t maxMatches)
+ {
+ fprintf(stderr, "GpuCompareOneVsList: Starting, count=%zu, threshold=%f\n", count, threshold);
+ fflush(stderr);
+
+ if (!g_pDeviceThumbnailBuffer || count > g_bufferCapacity || count == 0 ||
+ pQuery == nullptr || pIndices == nullptr || pMatchIndices == nullptr ||
+ pMatchDifferences == nullptr || pMatchCount == nullptr || maxMatches == 0) {
+ fprintf(stderr, "GpuCompareOneVsList: Parameter validation FAILED\n");
+ fflush(stderr);
+ return false;
+ }
+
+ fprintf(stderr, "GpuCompareOneVsList: Copying query to device\n");
+ fflush(stderr);
+ if (cudaMemcpy(g_pQueryBuffer, pQuery, g_thumbSize, cudaMemcpyHostToDevice) != cudaSuccess) {
+ fprintf(stderr, "GpuCompareOneVsList: Query copy FAILED\n");
+ fflush(stderr);
+ return false;
+ }
+
+ fprintf(stderr, "GpuCompareOneVsList: Copying indices to device\n");
+ fflush(stderr);
+ if (cudaMemcpy(g_pIndexBuffer, pIndices, count * sizeof(size_t), cudaMemcpyHostToDevice) != cudaSuccess) {
+ fprintf(stderr, "GpuCompareOneVsList: Indices copy FAILED\n");
+ fflush(stderr);
+ return false;
+ }
+
+ // Check for integer overflow before kernel launch
+ if (count > INT_MAX) {
+ fprintf(stderr, "GpuCompareOneVsList: Count exceeds INT_MAX\n");
+ fflush(stderr);
+#ifdef AD_LOGGER_ENABLE
+ AD_LOG("GPU: Count exceeds INT_MAX, cannot launch kernel");
+#endif
+ return false;
+ }
+
+ fprintf(stderr, "GpuCompareOneVsList: Launching kernel with count=%zu\n", count);
+ fflush(stderr);
+
+ int threadsPerBlock = 256;
+ OneVsListKernel<<< (int)count, threadsPerBlock, threadsPerBlock * sizeof(double) >>>(
+ g_pQueryBuffer, g_pDeviceThumbnailBuffer, g_pIndexBuffer, g_thumbSize, count, g_pResultBuffer, g_bufferCapacity);
+
+ fprintf(stderr, "GpuCompareOneVsList: Kernel launched, checking for errors\n");
+ fflush(stderr);
+
+ cudaError_t err = cudaGetLastError();
+ if (err != cudaSuccess) {
+ fprintf(stderr, "GpuCompareOneVsList: Kernel launch error: %s\n", cudaGetErrorString(err));
+ fflush(stderr);
+ return false;
+ }
+
+ fprintf(stderr, "GpuCompareOneVsList: Synchronizing device\n");
+ fflush(stderr);
+ err = cudaDeviceSynchronize();
+ if (err != cudaSuccess) {
+ fprintf(stderr, "GpuCompareOneVsList: Device sync error: %s\n", cudaGetErrorString(err));
+ fflush(stderr);
+ return false;
+ }
+
+ fprintf(stderr, "GpuCompareOneVsList: Copying results from device\n");
+ fflush(stderr);
+
+ std::vector results(count);
+ if (cudaMemcpy(results.data(), g_pResultBuffer, count * sizeof(double), cudaMemcpyDeviceToHost) != cudaSuccess) {
+ fprintf(stderr, "GpuCompareOneVsList: Results copy FAILED\n");
+ fflush(stderr);
+ return false;
+ }
+
+ fprintf(stderr, "GpuCompareOneVsList: Processing results\n");
+ fflush(stderr);
+
+ size_t found = 0;
+ for (size_t i = 0; i < count && found < maxMatches; ++i) {
+ if (results[i] <= threshold) {
+ pMatchIndices[found] = pIndices[i];
+ pMatchDifferences[found] = results[i];
+ found++;
+ }
+ }
+ *pMatchCount = found;
+
+ fprintf(stderr, "GpuCompareOneVsList: Completed, found=%zu matches\n", found);
+ fflush(stderr);
+
+ return true;
+ }
+
+ double GpuCompareSquaredSum(const uint8_t* pSrc1, const uint8_t* pSrc2, size_t size)
+ {
+ fprintf(stderr, "GpuCompareSquaredSum: Starting, size=%zu\n", size);
+ fflush(stderr);
+
+ if (pSrc1 == nullptr || pSrc2 == nullptr) {
+ fprintf(stderr, "GpuCompareSquaredSum: Null input pointers\n");
+ fflush(stderr);
+ return 1e10;
+ }
+
+ // Check CUDA device availability
+ int deviceCount = 0;
+ cudaError_t testErr = cudaGetDeviceCount(&deviceCount);
+ if (testErr != cudaSuccess || deviceCount == 0) {
+ fprintf(stderr, "GpuCompareSquaredSum: No CUDA devices available\n");
+ fflush(stderr);
+ return 1e10;
+ }
+
+ uint8_t *d_1 = nullptr, *d_2 = nullptr;
+ double *d_r = nullptr, h_r = 0;
+ size_t numBlocks = 0;
+
+ fprintf(stderr, "GpuCompareSquaredSum: Allocating device memory\n");
+ fflush(stderr);
+
+ cudaError_t err;
+ err = cudaMalloc(&d_1, size);
+ if (err != cudaSuccess) {
+ fprintf(stderr, "GpuCompareSquaredSum: cudaMalloc d_1 failed: %s\n", cudaGetErrorString(err));
+ fflush(stderr);
+ goto cleanup;
+ }
+
+ err = cudaMalloc(&d_2, size);
+ if (err != cudaSuccess) {
+ fprintf(stderr, "GpuCompareSquaredSum: cudaMalloc d_2 failed: %s\n", cudaGetErrorString(err));
+ fflush(stderr);
+ goto cleanup;
+ }
+
+ err = cudaMalloc(&d_r, sizeof(double));
+ if (err != cudaSuccess) {
+ fprintf(stderr, "GpuCompareSquaredSum: cudaMalloc d_r failed: %s\n", cudaGetErrorString(err));
+ fflush(stderr);
+ goto cleanup;
+ }
+
+ fprintf(stderr, "GpuCompareSquaredSum: Copying data to device\n");
+ fflush(stderr);
+
+ if (cudaMemcpy(d_1, pSrc1, size, cudaMemcpyHostToDevice) != cudaSuccess) {
+ fprintf(stderr, "GpuCompareSquaredSum: cudaMemcpy d_1 failed\n");
+ fflush(stderr);
+ goto cleanup;
+ }
+ if (cudaMemcpy(d_2, pSrc2, size, cudaMemcpyHostToDevice) != cudaSuccess) {
+ fprintf(stderr, "GpuCompareSquaredSum: cudaMemcpy d_2 failed\n");
+ fflush(stderr);
+ goto cleanup;
+ }
+ if (cudaMemset(d_r, 0, sizeof(double)) != cudaSuccess) {
+ fprintf(stderr, "GpuCompareSquaredSum: cudaMemset d_r failed\n");
+ fflush(stderr);
+ goto cleanup;
+ }
+
+ // Check for integer overflow before kernel launch
+ numBlocks = (size + 255) / 256;
+ if (numBlocks > INT_MAX) {
+ fprintf(stderr, "GpuCompareSquaredSum: Block count exceeds INT_MAX\n");
+ fflush(stderr);
+#ifdef AD_LOGGER_ENABLE
+ AD_LOG("GPU: Block count exceeds INT_MAX, cannot launch kernel");
+#endif
+ goto cleanup;
+ }
+
+ fprintf(stderr, "GpuCompareSquaredSum: Launching kernel with %zu blocks\n", numBlocks);
+ fflush(stderr);
+
+ SquaredSumKernel<<< (int)numBlocks, 256, 256 * sizeof(double) >>>(d_1, d_2, size, d_r);
+
+ err = cudaGetLastError();
+ if (err != cudaSuccess) {
+ fprintf(stderr, "GpuCompareSquaredSum: Kernel launch failed: %s\n", cudaGetErrorString(err));
+ fflush(stderr);
+ goto cleanup;
+ }
+
+ err = cudaDeviceSynchronize();
+ if (err != cudaSuccess) {
+ fprintf(stderr, "GpuCompareSquaredSum: Device synchronize failed: %s\n", cudaGetErrorString(err));
+ fflush(stderr);
+ goto cleanup;
+ }
+
+ fprintf(stderr, "GpuCompareSquaredSum: Copying result from device\n");
+ fflush(stderr);
+
+ err = cudaMemcpy(&h_r, d_r, sizeof(double), cudaMemcpyDeviceToHost);
+ if (err != cudaSuccess) {
+ fprintf(stderr, "GpuCompareSquaredSum: cudaMemcpy result failed: %s\n", cudaGetErrorString(err));
+ fflush(stderr);
+ goto cleanup;
+ }
+
+ fprintf(stderr, "GpuCompareSquaredSum: Success, result=%f\n", h_r);
+ fflush(stderr);
+
+ cleanup:
+ if (d_1) cudaFree(d_1);
+ if (d_2) cudaFree(d_2);
+ if (d_r) cudaFree(d_r);
+ return h_r;
+ }
+
+ // NEW: AllVsAll comparison с массовым upload + streaming callback
+ bool GpuCompareAllVsAll(
+ const uint8_t* allThumbnails, // Все thumbnails в RAM
+ const uint64_t* allCrcArray, // CRC32c для каждого изображения
+ size_t count, // Количество изображений
+ size_t thumbSize, // Размер одного thumbnail (1024)
+ double threshold, // Порог squared difference
+ double maxDifference, // Максимальная разница для нормализации
+ double addDiffForCrcMismatch, // Добавка за несовпадение CRC
+ void* callbackContext, // Контекст для callback
+ GpuMatchCallback callback, // Callback для streaming обработки
+ size_t maxMatchesPerBatch) // Максимум matches за один вызов
+ {
+ AD_DEBUG("GpuCompareAllVsAll: Starting\n");
+
+ // Очищаем любые накопленные ошибки CUDA перед началом
+ cudaGetLastError(); // clear any pending errors
+
+ if (!allThumbnails || !allCrcArray || count == 0 || thumbSize == 0 || !callback) {
+ AD_DEBUG("GpuCompareAllVsAll: Invalid parameters\n");
+ return false;
+ }
+
+ size_t totalPairs = count * (count - 1) / 2;
+ AD_DEBUG_FMT("GpuCompareAllVsAll: Comparing %zu images, %zu pairs\n", count, totalPairs);
+
+ // Выделяем VRAM для thumbnails
+ uint8_t* d_thumbnails = nullptr;
+ uint64_t* d_crcArray = nullptr;
+ Match* d_results = nullptr;
+ size_t* d_matchCount = nullptr;
+
+ cudaError_t err;
+
+ // 1. Выделяем память для thumbnails
+ AD_DEBUG("GpuCompareAllVsAll: Allocating VRAM for thumbnails\n");
+
+ // Проверяем доступную VRAM перед выделением
+ size_t freeMem = 0, totalMem = 0;
+ cudaMemGetInfo(&freeMem, &totalMem);
+ size_t requiredMem = count * thumbSize + count * sizeof(uint64_t) + maxMatchesPerBatch * sizeof(Match);
+ AD_DEBUG_FMT("GpuCompareAllVsAll: VRAM free=%zu MB, total=%zu MB, required=%zu MB\n",
+ freeMem / 1024 / 1024, totalMem / 1024 / 1024, requiredMem / 1024 / 1024);
+
+ if (requiredMem > freeMem * 9 / 10) { // Используем максимум 90% свободной VRAM
+ AD_DEBUG_FMT("GpuCompareAllVsAll: Not enough VRAM (need %zu MB, have %zu MB)\n",
+ requiredMem / 1024 / 1024, freeMem / 1024 / 1024);
+ return false;
+ }
+
+ err = cudaMalloc(&d_thumbnails, count * thumbSize);
+ if (err != cudaSuccess) {
+ AD_DEBUG_FMT("GpuCompareAllVsAll: Failed to allocate thumbnails VRAM: %s\n", cudaGetErrorString(err));
+ cudaGetLastError(); // Clear error state
+ return false;
+ }
+
+ // 2. Выделяем память для CRC массива
+ AD_DEBUG("GpuCompareAllVsAll: Allocating VRAM for CRC array\n");
+ err = cudaMalloc(&d_crcArray, count * sizeof(uint64_t));
+ if (err != cudaSuccess) {
+ AD_DEBUG_FMT("GpuCompareAllVsAll: Failed to allocate CRC VRAM: %s\n", cudaGetErrorString(err));
+ cudaFree(d_thumbnails);
+ return false;
+ }
+
+ // 3. Выделяем память для результатов (batch buffer)
+ AD_DEBUG("GpuCompareAllVsAll: Allocating VRAM for results\n");
+ err = cudaMalloc(&d_results, maxMatchesPerBatch * sizeof(Match));
+ if (err != cudaSuccess) {
+ AD_DEBUG_FMT("GpuCompareAllVsAll: Failed to allocate results VRAM: %s\n", cudaGetErrorString(err));
+ cudaFree(d_thumbnails);
+ cudaFree(d_crcArray);
+ return false;
+ }
+
+ // 4. Выделяем память для counter
+ err = cudaMalloc(&d_matchCount, sizeof(size_t));
+ if (err != cudaSuccess) {
+ AD_DEBUG("GpuCompareAllVsAll: Failed to allocate counter VRAM\n");
+ cudaFree(d_thumbnails);
+ cudaFree(d_crcArray);
+ cudaFree(d_results);
+ return false;
+ }
+
+ // 5. Upload всех thumbnails в VRAM
+ AD_DEBUG("GpuCompareAllVsAll: Uploading all thumbnails to VRAM\n");
+ err = cudaMemcpy(d_thumbnails, allThumbnails, count * thumbSize, cudaMemcpyHostToDevice);
+ if (err != cudaSuccess) {
+ AD_DEBUG_FMT("GpuCompareAllVsAll: Upload thumbnails failed: %s\n", cudaGetErrorString(err));
+ cudaFree(d_thumbnails);
+ cudaFree(d_crcArray);
+ cudaFree(d_results);
+ cudaFree(d_matchCount);
+ return false;
+ }
+ AD_DEBUG("GpuCompareAllVsAll: Upload thumbnails complete\n");
+
+ // 6. Upload CRC массива
+ AD_DEBUG("GpuCompareAllVsAll: Uploading CRC array to VRAM\n");
+ err = cudaMemcpy(d_crcArray, allCrcArray, count * sizeof(uint64_t), cudaMemcpyHostToDevice);
+ if (err != cudaSuccess) {
+ AD_DEBUG_FMT("GpuCompareAllVsAll: Upload CRC failed: %s\n", cudaGetErrorString(err));
+ cudaFree(d_thumbnails);
+ cudaFree(d_crcArray);
+ cudaFree(d_results);
+ cudaFree(d_matchCount);
+ return false;
+ }
+ AD_DEBUG("GpuCompareAllVsAll: Upload CRC complete\n");
+
+ // 7. Инициализируем counter
+ size_t h_matchCount = 0;
+ err = cudaMemcpy(d_matchCount, &h_matchCount, sizeof(size_t), cudaMemcpyHostToDevice);
+ if (err != cudaSuccess) {
+ AD_DEBUG_FMT("GpuCompareAllVsAll: Counter init failed: %s\n", cudaGetErrorString(err));
+ cudaFree(d_thumbnails);
+ cudaFree(d_crcArray);
+ cudaFree(d_results);
+ cudaFree(d_matchCount);
+ return false;
+ }
+
+ // 8. Запускаем kernel
+ AD_DEBUG("GpuCompareAllVsAll: Launching kernel\n");
+
+ int threadsPerBlock = 256;
+ size_t blocks = count;
+ if (blocks > 65535) blocks = 65535;
+ if (blocks == 0) blocks = 1;
+
+ AD_DEBUG_FMT("GpuCompareAllVsAll: Launching %zu blocks, %d threads/block\n", blocks, threadsPerBlock);
+
+ AllVsAllKernel<<<(int)blocks, threadsPerBlock, thumbSize>>>(
+ d_thumbnails, d_crcArray, thumbSize, count, threshold, maxDifference, addDiffForCrcMismatch,
+ maxMatchesPerBatch, d_results, d_matchCount);
+
+ err = cudaGetLastError();
+ if (err != cudaSuccess) {
+ AD_DEBUG_FMT("GpuCompareAllVsAll: Kernel launch failed: %s\n", cudaGetErrorString(err));
+ cudaFree(d_thumbnails);
+ cudaFree(d_crcArray);
+ cudaFree(d_results);
+ cudaFree(d_matchCount);
+ return false;
+ }
+
+ // 9. Ждём завершения kernel
+ AD_DEBUG("GpuCompareAllVsAll: Synchronizing\n");
+ err = cudaDeviceSynchronize();
+ if (err != cudaSuccess) {
+ AD_DEBUG_FMT("GpuCompareAllVsAll: Sync failed: %s\n", cudaGetErrorString(err));
+ cudaFree(d_thumbnails);
+ cudaFree(d_crcArray);
+ cudaFree(d_results);
+ cudaFree(d_matchCount);
+ return false;
+ }
+ AD_DEBUG("GpuCompareAllVsAll: Kernel complete\n");
+
+ // 10. Считываем total match count
+ cudaMemcpy(&h_matchCount, d_matchCount, sizeof(size_t), cudaMemcpyDeviceToHost);
+ AD_DEBUG_FMT("GpuCompareAllVsAll: Found %zu total matches (buffer capacity: %zu)\n", h_matchCount, maxMatchesPerBatch);
+
+ // Ограничиваем чтение размером буфера
+ size_t matchesToRead = (h_matchCount < maxMatchesPerBatch) ? h_matchCount : maxMatchesPerBatch;
+ if (h_matchCount > maxMatchesPerBatch) {
+ AD_DEBUG_FMT("GpuCompareAllVsAll: WARNING! Truncated from %zu to %zu matches\n", h_matchCount, maxMatchesPerBatch);
+ }
+
+ // 11. Streaming readback — читаем батчами и вызываем callback
+ if (matchesToRead > 0) {
+ std::vector h_batch(maxMatchesPerBatch);
+ size_t remaining = matchesToRead;
+ size_t offset = 0;
+
+ while (remaining > 0) {
+ size_t batchSize = (remaining < maxMatchesPerBatch) ? remaining : maxMatchesPerBatch;
+ AD_DEBUG_FMT("GpuCompareAllVsAll: Reading batch %zu matches (offset %zu)\n", batchSize, offset);
+
+ err = cudaMemcpy(h_batch.data(), d_results + offset, batchSize * sizeof(Match), cudaMemcpyDeviceToHost);
+ if (err != cudaSuccess) {
+ AD_DEBUG_FMT("GpuCompareAllVsAll: Readback failed: %s\n", cudaGetErrorString(err));
+ break;
+ }
+
+ // Вызываем callback с батчем
+ callback(h_batch.data(), batchSize, callbackContext);
+
+ remaining -= batchSize;
+ offset += batchSize;
+ }
+ AD_DEBUG("GpuCompareAllVsAll: All batches processed\n");
+ }
+
+ // 12. Освобождаем VRAM
+ cudaFree(d_thumbnails);
+ cudaFree(d_crcArray);
+ cudaFree(d_results);
+ cudaFree(d_matchCount);
+
+ AD_DEBUG("GpuCompareAllVsAll: Complete\n");
+ return true;
+ }
+}
diff --git a/src/AntiDupl/adGPU.h b/src/AntiDupl/adGPU.h
new file mode 100644
index 00000000..caba22aa
--- /dev/null
+++ b/src/AntiDupl/adGPU.h
@@ -0,0 +1,83 @@
+/*
+* AntiDuplPlus Program (http://github.com/Sucotasch/AntiDuplPlus).
+*
+* Copyright (c) 2023-2026.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy
+* of this software and associated documentation files (the "Software"), to deal
+* in the Software without restriction, including without limitation the rights
+* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the Software is
+* furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*/
+#ifndef __adGPU_h__
+#define __adGPU_h__
+
+#include "adConfig.h"
+
+namespace ad
+{
+ struct GpuDeviceInfo
+ {
+ char name[256];
+ size_t totalGlobalMem;
+ int computeMajor;
+ int computeMinor;
+ bool isCompatible;
+ };
+
+ // Match result structure
+ struct Match {
+ uint32_t image1;
+ uint32_t image2;
+ float difference;
+ };
+
+ bool GpuInit(GpuDeviceInfo* pInfo);
+ void GpuRelease();
+
+ // Persistent Buffer Management
+ bool GpuCreateBuffer(size_t capacity, size_t thumbSize);
+ void GpuReleaseBuffer();
+ bool GpuUploadThumbnail(size_t index, const uint8_t* pData);
+
+ // Batch Comparison: Compare one query thumbnail against a range of thumbnails in the buffer
+ // Returns number of matches found (difference <= threshold)
+ bool GpuCompareOneVsMany(const uint8_t* pQuery, size_t startIdx, size_t count, double threshold,
+ size_t* pMatchIndices, double* pMatchDifferences, size_t* pMatchCount, size_t maxMatches);
+
+ // Compare one query thumbnail against a list of thumbnail indices in the buffer
+ bool GpuCompareOneVsList(const uint8_t* pQuery, const size_t* pIndices, size_t count, double threshold,
+ size_t* pMatchIndices, double* pMatchDifferences, size_t* pMatchCount, size_t maxMatches);
+
+ // Single comparison (optimized, no malloc inside)
+ double GpuCompareSquaredSum(const uint8_t* pSrc1, const uint8_t* pSrc2, size_t size);
+
+ // NEW: AllVsAll comparison с streaming callback
+ typedef void (*GpuMatchCallback)(const void* batch, size_t count, void* context);
+
+ bool GpuCompareAllVsAll(
+ const uint8_t* allThumbnails, // Все thumbnails в RAM
+ const uint64_t* allCrcArray, // CRC32c для каждого изображения
+ size_t count, // Количество изображений
+ size_t thumbSize, // Размер одного thumbnail (1024)
+ double threshold, // Порог squared difference
+ double maxDifference, // Максимальная разница для нормализации
+ double addDiffForCrcMismatch, // Добавка за несовпадение CRC
+ void* callbackContext, // Контекст для callback
+ GpuMatchCallback callback, // Callback для streaming обработки
+ size_t maxMatchesPerBatch); // Максимум matches за один вызов
+}
+
+#endif//__adGPU_h__
diff --git a/src/AntiDupl/adGPUManager.cpp b/src/AntiDupl/adGPUManager.cpp
new file mode 100644
index 00000000..c1610501
--- /dev/null
+++ b/src/AntiDupl/adGPUManager.cpp
@@ -0,0 +1,106 @@
+/*
+* AntiDuplPlus Program (http://github.com/Sucotasch/AntiDuplPlus).
+*
+* Copyright (c) 2023-2026.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy
+* of this software and associated documentation files (the "Software"), to deal
+* in the Software without restriction, including without limitation the rights
+* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the Software is
+* furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*/
+#include "adGPUManager.h"
+#include "adLogger.h"
+#include
+#include
+
+#define AD_DEBUG(msg) OutputDebugStringA(msg)
+
+namespace ad
+{
+ TGpuManager::TGpuManager()
+ : m_available(false), m_capacity(0), m_thumbSize(0)
+ {
+ AD_DEBUG("TGpuManager: Constructor starting\n");
+ memset(&m_deviceInfo, 0, sizeof(m_deviceInfo));
+ try
+ {
+ AD_DEBUG("TGpuManager: Calling GpuInit\n");
+ if (GpuInit(&m_deviceInfo))
+ {
+ AD_DEBUG("TGpuManager: GpuInit succeeded, checking compatibility\n");
+ if (m_deviceInfo.isCompatible)
+ {
+ m_available = true;
+ AD_DEBUG("TGpuManager: GPU available and compatible\n");
+ }
+ else
+ {
+ AD_DEBUG("TGpuManager: GPU not compatible\n");
+ }
+ }
+ else
+ {
+ AD_DEBUG("TGpuManager: GpuInit failed\n");
+ }
+ }
+ catch (const std::exception& e)
+ {
+ AD_DEBUG("TGpuManager: Exception caught\n");
+ m_available = false;
+#ifdef AD_LOGGER_ENABLE
+ AD_LOG("GPU: Exception during initialization.");
+#endif
+ }
+ catch (...)
+ {
+ AD_DEBUG("TGpuManager: Unknown exception caught\n");
+ m_available = false;
+#ifdef AD_LOGGER_ENABLE
+ AD_LOG("GPU: Unknown exception during initialization.");
+#endif
+ }
+ AD_DEBUG("TGpuManager: Constructor finished\n");
+ }
+
+ TGpuManager::~TGpuManager()
+ {
+ if (m_available)
+ {
+ std::lock_guard lock(m_mutex);
+ GpuRelease();
+ }
+ }
+
+ bool TGpuManager::EnsureCapacity(size_t required, size_t thumbSize)
+ {
+ if (!m_available) return false;
+
+ std::lock_guard lock(m_mutex);
+ if (required <= m_capacity && thumbSize == m_thumbSize && m_capacity > 0) return true;
+
+ // Reallocate if size changed or capacity is too small
+ size_t newCapacity = (size_t)(required * 1.2);
+ if (newCapacity < 1024) newCapacity = 1024; // Minimum buffer to avoid frequent reallocs
+
+ if (GpuCreateBuffer(newCapacity, thumbSize))
+ {
+ m_capacity = newCapacity;
+ m_thumbSize = thumbSize;
+ return true;
+ }
+ return false;
+ }
+}
diff --git a/src/AntiDupl/adGPUManager.h b/src/AntiDupl/adGPUManager.h
new file mode 100644
index 00000000..aee97d73
--- /dev/null
+++ b/src/AntiDupl/adGPUManager.h
@@ -0,0 +1,101 @@
+/*
+* AntiDuplPlus Program (http://github.com/Sucotasch/AntiDuplPlus).
+*
+* Copyright (c) 2023-2026.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy
+* of this software and associated documentation files (the "Software"), to deal
+* in the Software without restriction, including without limitation the rights
+* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the Software is
+* furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*/
+#ifndef __adGPUManager_h__
+#define __adGPUManager_h__
+
+#include "adGPU.h"
+#include
+
+namespace ad
+{
+ class TGpuManager
+ {
+ public:
+ TGpuManager();
+ ~TGpuManager();
+
+ bool IsAvailable() const { return m_available; }
+ const GpuDeviceInfo& DeviceInfo() const { return m_deviceInfo; }
+
+ bool UploadThumbnail(size_t index, const uint8_t* pData) {
+ if (!m_available) return false;
+ std::lock_guard lock(m_mutex);
+ return GpuUploadThumbnail(index, pData);
+ }
+
+ bool CompareOneVsMany(const uint8_t* pQuery, size_t startIdx, size_t count, double threshold,
+ size_t* pMatchIndices, double* pMatchDifferences, size_t* pMatchCount, size_t maxMatches) {
+ if (!m_available) return false;
+ std::lock_guard lock(m_mutex);
+ return GpuCompareOneVsMany(pQuery, startIdx, count, threshold,
+ pMatchIndices, pMatchDifferences, pMatchCount, maxMatches);
+ }
+
+ bool CompareOneVsList(const uint8_t* pQuery, const size_t* pIndices, size_t count, double threshold,
+ size_t* pMatchIndices, double* pMatchDifferences, size_t* pMatchCount, size_t maxMatches) {
+ if (!m_available) return false;
+ std::lock_guard lock(m_mutex);
+ return GpuCompareOneVsList(pQuery, pIndices, count, threshold,
+ pMatchIndices, pMatchDifferences, pMatchCount, maxMatches);
+ }
+
+ void ClearBuffer() {
+ if (!m_available) return;
+ std::lock_guard lock(m_mutex);
+ size_t size = m_capacity; // Reuse current capacity
+ GpuReleaseBuffer();
+ GpuCreateBuffer(size, m_thumbSize);
+ }
+
+ bool EnsureCapacity(size_t required, size_t thumbSize);
+
+ // NEW: AllVsAll comparison с streaming callback
+ bool CompareAllVsAll(
+ const uint8_t* allThumbnails,
+ const uint64_t* allCrcArray,
+ size_t count,
+ size_t thumbSize,
+ double threshold,
+ double maxDifference,
+ double addDiffForCrcMismatch,
+ void* callbackContext,
+ GpuMatchCallback callback,
+ size_t maxMatchesPerBatch) {
+ if (!m_available) return false;
+ std::lock_guard lock(m_mutex);
+ return GpuCompareAllVsAll(allThumbnails, allCrcArray, count, thumbSize,
+ threshold, maxDifference, addDiffForCrcMismatch,
+ callbackContext, callback, maxMatchesPerBatch);
+ }
+
+ private:
+ bool m_available;
+ GpuDeviceInfo m_deviceInfo;
+ size_t m_capacity;
+ size_t m_thumbSize;
+ mutable std::mutex m_mutex;
+ };
+}
+
+#endif//__adGPUManager_h__
diff --git a/src/AntiDupl/adIO.h b/src/AntiDupl/adIO.h
index 5f8d88a4..2d40898f 100644
--- a/src/AntiDupl/adIO.h
+++ b/src/AntiDupl/adIO.h
@@ -24,6 +24,7 @@
#ifndef __adIO_h__
#define __adIO_h__
+#include
#include "adConfig.h"
namespace ad
@@ -106,7 +107,7 @@ namespace ad
{
std::stringstream ss;
ss << "Can't read " << sizeof(T) << " bytes from stream!";
- throw std::exception(ss.str().c_str());
+ throw std::runtime_error(ss.str());
}
return (T)0;
}
@@ -155,7 +156,7 @@ namespace ad
{
std::stringstream ss;
ss << "Can't read " << size << " bytes from stream!";
- throw std::exception(ss.str().c_str());
+ throw std::runtime_error(ss.str());
}
}
}
@@ -167,7 +168,7 @@ namespace ad
LARGE_INTEGER shift;
shift.QuadPart = size;
if(pStream->Seek(shift, STREAM_SEEK_CUR, NULL) != S_OK)
- throw std::exception("Can't read stub buffer!");
+ throw std::runtime_error("Can't read stub buffer!");
}
}
@@ -176,7 +177,7 @@ namespace ad
ULARGE_INTEGER position;
LARGE_INTEGER shift = {0};
if(pStream->Seek(shift, STREAM_SEEK_CUR, &position) != S_OK)
- throw std::exception("Can't get current stream position!");
+ throw std::runtime_error("Can't get current stream position!");
return position.QuadPart;
}
@@ -185,7 +186,7 @@ namespace ad
LARGE_INTEGER shift;
shift.QuadPart = position;
if(pStream->Seek(shift, STREAM_SEEK_CUR, NULL) != S_OK)
- throw std::exception("Can't get current stream position!");
+ throw std::runtime_error("Can't get current stream position!");
}
}
diff --git a/src/AntiDupl/adImage.cpp b/src/AntiDupl/adImage.cpp
index a409879e..8d8a2ed7 100644
--- a/src/AntiDupl/adImage.cpp
+++ b/src/AntiDupl/adImage.cpp
@@ -112,7 +112,7 @@ namespace ad
return THeif::Load(hGlobal);
#ifdef AD_TURBO_JPEG_ENABLE
if (pOptions->advanced.useLibJpegTurbo && TTurboJpeg::Supported(hGlobal))
- return TTurboJpeg::Load(hGlobal);
+ return TTurboJpeg::Load(hGlobal, pOptions->advanced.reducedImageSize);
#endif//AD_TURBO_JPEG_ENABLE
else
return TGdiplus::Load(hGlobal);
diff --git a/src/AntiDupl/adImageComparer.cpp b/src/AntiDupl/adImageComparer.cpp
index 5dfd6e8b..2cf8321e 100644
--- a/src/AntiDupl/adImageComparer.cpp
+++ b/src/AntiDupl/adImageComparer.cpp
@@ -31,6 +31,10 @@
#include "adResultStorage.h"
#include "adImageComparer.h"
#include "adImageDataStorage.h"
+#include "adGPUManager.h"
+#include
+
+#define AD_DEBUG(msg) OutputDebugStringA(msg)
namespace ad
{
@@ -39,7 +43,8 @@ namespace ad
//-------------------------------------------------------------------------
TImageComparer::TImageComparer(TEngine *pEngine)
- :m_pOptions(pEngine->Options()),
+ :m_pEngine(pEngine),
+ m_pOptions(pEngine->Options()),
m_pResult(pEngine->Result()),
m_pTransformedImageData(NULL),
m_pBuffer(NULL),
@@ -116,25 +121,111 @@ namespace ad
// pTransformed - трансформированное, если применяется трансформация или то же что и оригинальное.
void TImageComparer::CompareWithSet(const Set &set, TImageDataPtr pOriginal, TImageDataPtr pTransformed, adTransformType transform)
{
- double difference;
- // Если картинка не в проверенных
- if(!pTransformed->valid)
+ TGpuManager* pGpu = m_pEngine->GpuManager();
+ bool gpuEligible = (pGpu && pGpu->IsAvailable() &&
+ m_pOptions->compare.algorithmComparing == AD_COMPARING_SQUARED_SUM &&
+ m_pOptions->advanced.ignoreFrameWidth == 0);
+
+ if (gpuEligible)
+ {
+ if(!pTransformed->valid)
+ CompareWithSetGPU(set.valid, pOriginal, pTransformed, transform);
+ CompareWithSetGPU(set.other, pOriginal, pTransformed, transform);
+ }
+ else
{
- // Сравниваем с набором проверенных
- for(TImageDataPtrList::const_iterator i = set.valid.begin(); i != set.valid.end(); ++i)
+ double difference;
+ // Если картинка не в проверенных
+ if(!pTransformed->valid)
+ {
+ // Сравниваем с набором проверенных
+ for(TImageDataPtrList::const_iterator i = set.valid.begin(); i != set.valid.end(); ++i)
+ {
+ if(IsDuplPair(pTransformed, *i, &difference))
+ m_pResult->AddDuplImagePair(pOriginal, *i, difference, transform);
+ }
+ }
+ // Сравниваем с набором остальных
+ for(TImageDataPtrList::const_iterator i = set.other.begin(); i != set.other.end(); ++i)
{
if(IsDuplPair(pTransformed, *i, &difference))
m_pResult->AddDuplImagePair(pOriginal, *i, difference, transform);
}
}
- // Сравниваем с набором остальных
- for(TImageDataPtrList::const_iterator i = set.other.begin(); i != set.other.end(); ++i)
- {
- if(IsDuplPair(pTransformed, *i, &difference))
- m_pResult->AddDuplImagePair(pOriginal, *i, difference, transform);
- }
}
+ void TImageComparer::CompareWithSetGPU(const TImageDataPtrList &list, TImageDataPtr pOriginal, TImageDataPtr pTransformed, adTransformType transform)
+ {
+ AD_DEBUG("CompareWithSetGPU: Starting\n");
+
+ if (list.empty()) return;
+
+ TGpuManager* pGpu = m_pEngine->GpuManager();
+ double threshold = (double)m_mainThreshold;
+
+ AD_DEBUG("CompareWithSetGPU: Gathering indices\n");
+
+ // Gather indices
+ std::vector indices;
+ std::vector ptrs;
+ indices.reserve(list.size());
+ ptrs.reserve(list.size());
+
+ for (TImageDataPtrList::const_iterator i = list.begin(); i != list.end(); ++i)
+ {
+ TImageDataPtr pSecond = *i;
+ // Basic CPU-side pre-checks to avoid unnecessary GPU work
+ if(m_pOptions->compare.typeControl == TRUE && pTransformed->type != pSecond->type) continue;
+ if(m_pOptions->compare.sizeControl == TRUE && (pTransformed->height != pSecond->height || pTransformed->width != pSecond->width)) continue;
+ if(m_pOptions->compare.ratioControl == TRUE && Simd::Square(pTransformed->ratio - pSecond->ratio) > Simd::Square(RATIO_THRESHOLD_DIFFERENCE)) continue;
+ if(m_pOptions->compare.compareInsideOneFolder == FALSE && TPath::EqualByDirectory(pTransformed->path, pSecond->path)) continue;
+ if(m_pOptions->compare.compareInsideOneSearchPath == FALSE && pTransformed->index == pSecond->index) continue;
+
+ indices.push_back(pSecond->globalIdx);
+ ptrs.push_back(pSecond);
+ }
+
+ AD_DEBUG("CompareWithSetGPU: Processing batches\n");
+
+ if (indices.empty()) return;
+
+ const size_t batchSize = 1024; // Process in chunks to manage VRAM/latency
+
+ for (size_t start = 0; start < indices.size(); start += batchSize)
+ {
+ size_t count = std::min(batchSize, indices.size() - start);
+
+ std::vector matchIndices(count);
+ std::vector matchDiffs(count);
+ size_t matchCount = 0;
+
+ if (pGpu->CompareOneVsList(pTransformed->data->main, &indices[start], count, threshold,
+ matchIndices.data(), matchDiffs.data(), &matchCount, count))
+ {
+ for (size_t m = 0; m < matchCount; ++m)
+ {
+ // Find original pointer by matching globalIdx
+ for (size_t b = 0; b < count; ++b) {
+ if (indices[start + b] == matchIndices[m]) {
+ TImageDataPtr pSecond = ptrs[start + b];
+ double difference = sqrt(matchDiffs[m]/m_maxDifference)*100;
+ if(pOriginal->crc32c != pSecond->crc32c)
+ difference += ADDITIONAL_DIFFERENCE_FOR_DIFFERENT_CRC32;
+ m_pResult->AddDuplImagePair(pOriginal, pSecond, difference, transform);
+ break;
+ }
+ }
+ }
+ }
+ else
+ {
+ AD_DEBUG("CompareWithSetGPU: GpuCompareOneVsList FAILED\n");
+ }
+ }
+
+ AD_DEBUG("CompareWithSetGPU: Finished\n");
+ }
+
void TImageComparer::AddToSet(Set &set, TImageDataPtr pImageData)
{
if(pImageData->valid)
@@ -173,17 +264,18 @@ namespace ad
if(fastDifference > m_fastThreshold)
return false;
- uint64_t mainDifference = 0;
+ uint64_t mainDifference = 0;
if(m_pOptions->advanced.ignoreFrameWidth > 0)
{
SimdSquaredDifferenceSumMasked(pFirst->data->main, m_mainSize, pSecond->data->main, m_mainSize,
- m_pMask, m_mainSize, FRAME_MASK_INDEX, m_mainSize, 1, &mainDifference);
+ m_pMask, m_mainSize, FRAME_MASK_INDEX, m_mainSize, 1, &mainDifference);
}
else
{
- SimdSquaredDifferenceSum(pFirst->data->main, m_mainSize, pSecond->data->main, m_mainSize,
- m_mainSize, 1, &mainDifference);
+ SimdSquaredDifferenceSum(pFirst->data->main, m_mainSize, pSecond->data->main, m_mainSize,
+ m_mainSize, 1, &mainDifference);
}
+
if(mainDifference > m_mainThreshold)
return false;
diff --git a/src/AntiDupl/adImageComparer.h b/src/AntiDupl/adImageComparer.h
index 572b0cef..e8382a89 100644
--- a/src/AntiDupl/adImageComparer.h
+++ b/src/AntiDupl/adImageComparer.h
@@ -50,6 +50,7 @@ namespace ad
typedef std::vector Sets;
Sets m_sets;
+ TEngine *m_pEngine;
TOptions *m_pOptions;
public:
TImageComparer(TEngine *pEngine);
@@ -64,6 +65,7 @@ namespace ad
void AddToSet(Set &set, TImageDataPtr pImageData);
void CompareWithSet(const Set &set, TImageDataPtr pOriginal, TImageDataPtr pTransformed, adTransformType transform);
+ void CompareWithSetGPU(const TImageDataPtrList &list, TImageDataPtr pOriginal, TImageDataPtr pTransformed, adTransformType transform);
private:
TResultStorage *m_pResult;
diff --git a/src/AntiDupl/adImageData.cpp b/src/AntiDupl/adImageData.cpp
index 1cc19491..81028a8e 100644
--- a/src/AntiDupl/adImageData.cpp
+++ b/src/AntiDupl/adImageData.cpp
@@ -51,6 +51,8 @@ namespace ad
ratio = 0;
valid = false;
index = AD_IS_NOT_EXIST;
+ globalIdx = 0;
+ pEngine = NULL;
defect = AD_DEFECT_UNDEFINE;
crc32c = 0;
data = NULL;
@@ -90,6 +92,8 @@ namespace ad
defect = imageData.defect;
crc32c = imageData.crc32c;
index = imageData.index;
+ globalIdx = imageData.globalIdx;
+ pEngine = imageData.pEngine;
if(m_owner && imageData.data->side != data->side)
{
delete data;
diff --git a/src/AntiDupl/adImageData.h b/src/AntiDupl/adImageData.h
index 425c349a..ebb1e603 100644
--- a/src/AntiDupl/adImageData.h
+++ b/src/AntiDupl/adImageData.h
@@ -31,12 +31,15 @@
namespace ad
{
struct TOptions;
+ class TEngine;
//-------------------------------------------------------------------------
struct TImageData : public TImageInfo
{
TInt32 ratio; // Ratio between height and width of image;
bool valid; // The Image lie in 'valid' directory;
size_t index; // Index of the path from path list where this image were found;
+ size_t globalIdx; // Unique index for GPU buffer
+ TEngine* pEngine; // Pointer to engine for GPU access
TDefectType defect;
TUInt32 crc32c;
TPixelDataPtr data;
diff --git a/src/AntiDupl/adImageDataStorage.cpp b/src/AntiDupl/adImageDataStorage.cpp
index ae8a771b..b8eea24e 100644
--- a/src/AntiDupl/adImageDataStorage.cpp
+++ b/src/AntiDupl/adImageDataStorage.cpp
@@ -28,7 +28,9 @@
#include "adImageDataStorage.h"
#include "adIO.h"
#include "adFileStream.h"
+#include "adLogger.h"
#include "adException.h"
+#include "adGPUManager.h"
namespace ad
{
@@ -43,9 +45,11 @@ namespace ad
//-------------------------------------------------------------------------
TImageDataStorage::TImageDataStorage(TEngine *pEngine)
- :m_pStatus(pEngine->Status()),
+ :m_pEngine(pEngine),
+ m_pStatus(pEngine->Status()),
m_pOptions(pEngine->Options()),
- m_needToSave (false)
+ m_needToSave (false),
+ m_nextGlobalIdx(0)
{
}
@@ -63,6 +67,16 @@ namespace ad
TImageDataStorage::TStorage::iterator TImageDataStorage::Insert(TImageData* pImageData)
{
+ // Check for globalIdx overflow
+ if (m_nextGlobalIdx >= SIZE_MAX) {
+#ifdef AD_LOGGER_ENABLE
+ AD_LOG("GPU: globalIdx counter overflow, resetting indices...");
+#endif
+ ResetGpuIndices();
+ }
+
+ pImageData->globalIdx = m_nextGlobalIdx++;
+ pImageData->pEngine = m_pEngine;
return m_storage.insert(TStorage::value_type(pImageData->hash, pImageData));
}
@@ -71,12 +85,27 @@ namespace ad
for(TStorage::iterator it = m_storage.begin(); it != m_storage.end(); ++it)
delete it->second;
m_storage.clear();
+ m_nextGlobalIdx = 0;
+ if (m_pEngine->GpuManager() && m_pEngine->GpuManager()->IsAvailable())
+ {
+ m_pEngine->GpuManager()->ClearBuffer();
+ }
+ }
+
+ void TImageDataStorage::ResetGpuIndices()
+ {
+ m_nextGlobalIdx = 0;
+ for(TStorage::iterator it = m_storage.begin(); it != m_storage.end(); ++it)
+ {
+ it->second->globalIdx = m_nextGlobalIdx++;
+ }
}
void TImageDataStorage::Check()
{
m_pStatus->Reset();
size_t size = m_storage.size(), i = 0;
+ bool found_deleted = false;
for(TStorage::iterator it = m_storage.begin(); it != m_storage.end(); )
{
if(m_pStatus->Stopped())
@@ -86,6 +115,7 @@ namespace ad
{
delete it->second;
it = m_storage.erase(it);
+ found_deleted = true;
}
else
++it;
@@ -93,6 +123,16 @@ namespace ad
m_pStatus->SetProgress(i++, size);
}
m_pStatus->Reset();
+
+ // Re-index GPU indices after deletions
+ if (found_deleted) {
+ ResetGpuIndices();
+#ifdef AD_LOGGER_ENABLE
+ std::stringstream ss;
+ ss << "GPU: Re-indexed " << m_storage.size() << " images after cleanup.";
+ AD_LOG(ss.str().c_str());
+#endif
+ }
}
// Загружает в хранилише m_storage переданный файл
diff --git a/src/AntiDupl/adImageDataStorage.h b/src/AntiDupl/adImageDataStorage.h
index c161a5f8..78187e2c 100644
--- a/src/AntiDupl/adImageDataStorage.h
+++ b/src/AntiDupl/adImageDataStorage.h
@@ -33,6 +33,7 @@ namespace ad
// Хранение информации об изображениях в т.ч. эскизов
class TImageDataStorage
{
+ friend class TEngine;
public:
TImageDataStorage(TEngine *pEngine);
~TImageDataStorage() {ClearMemory();}
@@ -47,9 +48,12 @@ namespace ad
void Check();
void ClearMemory();
void SetSaveState(const bool needToSave);
+ void ResetGpuIndices();
- private:
typedef std::multimap TStorage;
+ const TStorage& Storage() const { return m_storage; }
+
+ private:
typedef std::vector TVector;
TStorage::iterator Find(const TImageInfo& imageInfo);
@@ -57,10 +61,12 @@ namespace ad
// Информация которую будем записывать. Словарь TImageData
TStorage m_storage;
+ TEngine *m_pEngine;
TStatus *m_pStatus;
TOptions *m_pOptions;
bool m_needToSave;
+ size_t m_nextGlobalIdx;
struct TData
{
diff --git a/src/AntiDupl/adImageUtils.cpp b/src/AntiDupl/adImageUtils.cpp
index 1a3a6776..f1566401 100644
--- a/src/AntiDupl/adImageUtils.cpp
+++ b/src/AntiDupl/adImageUtils.cpp
@@ -64,18 +64,18 @@ namespace ad
int img_stride = (pImage->View()->width) * TView::PixelSize(TView::Bgra32);;
TView Bgra(pImage->View()->width, pImage->View()->height, img_stride, TView::Bgra32, NULL);
Simd::RgbToBgra(*pImage->View(), Bgra);
- Simd::ResizeBilinear(Bgra, view);
+ Simd::Resize(Bgra, view);
}
else if (pImage->View()->format == TView::Format::Rgba32)
{
int img_stride = (pImage->View()->width) * TView::PixelSize(TView::Bgra32);;
TView Bgra(pImage->View()->width, pImage->View()->height, TView::Bgra32, NULL);
Simd::RgbaToBgra(*pImage->View(), Bgra);
- Simd::ResizeBilinear(Bgra, view);
+ Simd::Resize(Bgra, view);
}
else
{
- Simd::ResizeBilinear(*pImage->View(), view);
+ Simd::Resize(*pImage->View(), view);
}
result = AD_OK;
}
diff --git a/src/AntiDupl/adJxl.cpp b/src/AntiDupl/adJxl.cpp
index 1da8b868..e1a6a121 100644
--- a/src/AntiDupl/adJxl.cpp
+++ b/src/AntiDupl/adJxl.cpp
@@ -72,17 +72,16 @@ namespace ad
JXL_DEC_FULL_IMAGE))
{
#ifdef AD_LOGGER_ENABLE
- AD_LOG("JxlDecoderSubscribeEvents failed\n);
+ AD_LOG("JxlDecoderSubscribeEvents failed\n");
#endif//AD_LOGGER_ENABLE
return NULL;
}
-
- if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(decoder.get(),
- JxlResizableParallelRunner,
- runner.get()))
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderSetParallelRunner(decoder.get(),
+ JxlResizableParallelRunner, runner.get()))
{
#ifdef AD_LOGGER_ENABLE
- AD_LOG("JxlDecoderSetParallelRunner failed\n);
+ AD_LOG("JxlDecoderSetParallelRunner failed\n");
#endif//AD_LOGGER_ENABLE
return NULL;
}
@@ -118,7 +117,7 @@ namespace ad
if (JXL_DEC_SUCCESS != JxlDecoderGetBasicInfo(decoder.get(), &info))
{
#ifdef AD_LOGGER_ENABLE
- AD_LOG("JxlDecoderGetBasicInfo failed\n);
+ AD_LOG("JxlDecoderGetBasicInfo failed\n");
#endif//AD_LOGGER_ENABLE
return NULL;
}
@@ -134,10 +133,10 @@ namespace ad
size_t icc_size;
if (JXL_DEC_SUCCESS !=
JxlDecoderGetICCProfileSize(
- decoder.get(), &format, JXL_COLOR_PROFILE_TARGET_DATA, &icc_size))
+ decoder.get(), JXL_COLOR_PROFILE_TARGET_DATA, &icc_size))
{
#ifdef AD_LOGGER_ENABLE
- AD_LOG("JxlDecoderGetICCProfileSize failed\n);
+ AD_LOG("JxlDecoderGetICCProfileSize failed\n");
#endif//AD_LOGGER_ENABLE
return NULL;
}
@@ -156,7 +155,7 @@ namespace ad
JxlDecoderImageOutBufferSize(decoder.get(), &format, &buffer_size))
{
#ifdef AD_LOGGER_ENABLE
- AD_LOG("JxlDecoderImageOutBufferSize failed\n);
+ AD_LOG("JxlDecoderImageOutBufferSize failed\n");
#endif//AD_LOGGER_ENABLE
return NULL;
}
diff --git a/src/AntiDupl/adOpenJpeg.cpp b/src/AntiDupl/adOpenJpeg.cpp
index 82157300..07269242 100644
--- a/src/AntiDupl/adOpenJpeg.cpp
+++ b/src/AntiDupl/adOpenJpeg.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
#define OPJ_STATIC
+#include
#include "openjpeg.h"
#include "adPerformance.h"
@@ -317,19 +318,28 @@ namespace ad
{
AD_FUNCTION_PERFORMANCE_TEST
TView *pView = NULL;
- opj_codec_t * codec = opj_create_decompress(OpenJpegCodecFormat(data, size));
+
+ std::unique_ptr codec(
+ opj_create_decompress(OpenJpegCodecFormat(data, size)), opj_destroy_codec);
+
if(codec)
{
opj_dparameters_t parameters;
opj_set_default_decoder_parameters(¶meters);
- opj_setup_decoder(codec, ¶meters);
- opj_stream_t * stream = СreateBlobStream(data, size);
+ opj_setup_decoder(codec.get(), ¶meters);
+
+ std::unique_ptr stream(
+ СreateBlobStream(data, size), opj_stream_destroy);
+
if(stream)
{
- opj_image_t * image;
- if (opj_read_header(stream, codec, &image))
+ opj_image_t * imageRaw = NULL;
+ if (opj_read_header(stream.get(), codec.get(), &imageRaw))
{
- if(opj_decode(codec, stream, image))
+ std::unique_ptr image(
+ imageRaw, opj_image_destroy);
+
+ if(opj_decode(codec.get(), stream.get(), image.get()))
{
size_t width = image->x1 - image->x0;
size_t height = image->y1 - image->y0;
@@ -375,11 +385,8 @@ namespace ad
AD_PERFORMANCE_TEST_SET_SIZE(width*height)
}
}
- opj_image_destroy(image);
}
- opj_stream_destroy(stream);
}
- opj_destroy_codec(codec);
}
return pView;
}
diff --git a/src/AntiDupl/adThreadManagement.cpp b/src/AntiDupl/adThreadManagement.cpp
index 1a6e4c28..901386c4 100644
--- a/src/AntiDupl/adThreadManagement.cpp
+++ b/src/AntiDupl/adThreadManagement.cpp
@@ -32,6 +32,9 @@
#include "adResult.h"
#include "adResultStorage.h"
#include "adPerformance.h"
+#include
+
+#define AD_DEBUG(msg) OutputDebugStringA(msg)
namespace ad
{
@@ -171,7 +174,12 @@ namespace ad
void TCollectTask::DoOwn(TImageData *pImageData)
{
m_pDataCollector->Fill(pImageData);
- m_pCompareManager->Add(pImageData);
+
+ // Skip CPU comparison if GPU AllVsAll mode is enabled
+ if (!m_pEngine->SkipComparisonDuringCollection()) {
+ m_pCompareManager->Add(pImageData);
+ }
+
m_pStatus->Process(AD_THREAD_TYPE_COLLECT, Queue()->Id(), pImageData->path.Original().c_str());
}
//-------------------------------------------------------------------------
@@ -257,15 +265,36 @@ namespace ad
void TCompareManager::Add(TImageData *pImageData)
{
+ // Check if manager is started
+ if (m_pThreads == NULL) {
+ AD_DEBUG("TCompareManager::Add: Not started, skipping\n");
+ return;
+ }
+
+ AD_DEBUG("TCompareManager::Add: Starting\n");
+
if(CanCompare(pImageData))
{
+ AD_DEBUG("TCompareManager::Add: CanCompare is true\n");
+
TCriticalSection::TLocker locker(m_pCS);
size_t threadId = m_addCounter%m_pThreads->size();
+ AD_DEBUG("TCompareManager::Add: Pushing to threads\n");
+
for(TThreads::iterator i = m_pThreads->begin(); i != m_pThreads->end(); i++)
i->task->Queue()->Push(pImageData, threadId);
+
+ AD_DEBUG("TCompareManager::Add: Pushed to all threads\n");
+
m_pEngine->Status()->Assign(AD_THREAD_TYPE_COMPARE, threadId);
m_addCounter++;
}
+ else
+ {
+ AD_DEBUG("TCompareManager::Add: CanCompare is false\n");
+ }
+
+ AD_DEBUG("TCompareManager::Add: Finished\n");
}
size_t TCompareManager::DefaultThreadCount(size_t imageCount)
@@ -316,26 +345,58 @@ namespace ad
void TCollectManager::Add(TImageData *pImageData)
{
+ AD_DEBUG("TCollectManager::Add: Starting\n");
+
if(pImageData->DefectCheckingNeed(m_pOptions) || pImageData->PixelDataFillingNeed(m_pOptions) || pImageData->crc32c == 0)
{
+ AD_DEBUG("TCollectManager::Add: Loading file to memory\n");
pImageData->hGlobal = LoadFileToMemory(pImageData->path.Original().c_str());
+ AD_DEBUG("TCollectManager::Add: File loaded\n");
+
size_t threadId = GetThreadId();
+ AD_DEBUG("TCollectManager::Add: Got threadId\n");
+
m_pThreads->at(threadId).task->Queue()->Push(pImageData, threadId);
+ AD_DEBUG("TCollectManager::Add: Pushed to queue\n");
+
m_pEngine->Status()->Assign(AD_THREAD_TYPE_COLLECT, threadId);
}
else
{
+ AD_DEBUG("TCollectManager::Add: Using cached data\n");
TDefectType defect = pImageData->GetDefect(m_pOptions);
if(defect > AD_DEFECT_NONE)
m_pEngine->Result()->AddDefectImage(pImageData, defect);
+
+ AD_DEBUG("TCollectManager::Add: Calling FillOther\n");
pImageData->FillOther(m_pOptions);
- m_pCompareManager->Add(pImageData);
+
+ // Skip comparison if GPU AllVsAll mode is enabled
+ if (m_pEngine->SkipComparisonDuringCollection())
+ {
+ AD_DEBUG("TCollectManager::Add: Skipping comparison (GPU mode)\n");
+ }
+ else
+ {
+ AD_DEBUG("TCollectManager::Add: Calling CompareManager->Add\n");
+ m_pCompareManager->Add(pImageData);
+ AD_DEBUG("TCollectManager::Add: CompareManager->Add returned\n");
+ }
}
+
+ AD_DEBUG("TCollectManager::Add: Finished\n");
}
size_t TCollectManager::DefaultThreadCount()
{
size_t threadCountMax = GetProcessorCount();
+
+ // In GPU AllVsAll mode, maximize collection threads (leave 1 core for UI)
+ if (m_pEngine->SkipComparisonDuringCollection())
+ {
+ return Simd::Max((size_t)1, threadCountMax - 1);
+ }
+
#ifdef AD_TURBO_JPEG_ENABLE
return Simd::Max((size_t)1, threadCountMax / 2);
#else
diff --git a/src/AntiDupl/adTurboJpeg.cpp b/src/AntiDupl/adTurboJpeg.cpp
index 3f2299f8..1cb1f542 100644
--- a/src/AntiDupl/adTurboJpeg.cpp
+++ b/src/AntiDupl/adTurboJpeg.cpp
@@ -38,6 +38,7 @@ namespace ad
{
struct TurboJpeg
{
+ public:
TurboJpeg()
{
_handle = ::tjInitDecompress();
@@ -47,19 +48,39 @@ namespace ad
{
::tjDestroy(_handle);
}
+
+ tjhandle Handle() const { return _handle; }
- TView * Decompress(const unsigned char * data, size_t size)
+ TView * Decompress(const unsigned char * data, size_t size, int targetSize = 0)
{
int subsamp, colorspace, width, height, flags = 0;
if(::tjDecompressHeader3(_handle, data, (unsigned long)size, &width, &height, &subsamp, &colorspace) != 0)
return NULL;
if (width == 0 || height == 0)
return NULL;
- TView * pView = new TView(width, height, TView::Bgra32, NULL, 4);
- if (::tjDecompress2(_handle, data, size, pView->data, width, 0, height, ::TJPF_RGBA, flags) != 0 && ::tjGetErrorCode(_handle) != ::TJERR_WARNING)
+
+ // Scaled decode для ускорения (если targetSize > 0)
+ int scaledWidth = width;
+ int scaledHeight = height;
+
+ if (targetSize > 0 && (width > targetSize || height > targetSize)) {
+ // libjpeg-turbo поддерживает только определённые scaling factors
+ // Выбираем ближайший: 1/2, 1/4, 1/8
+ int denom = 1;
+ if (width / 8 >= targetSize && height / 8 >= targetSize) denom = 8;
+ else if (width / 4 >= targetSize && height / 4 >= targetSize) denom = 4;
+ else if (width / 2 >= targetSize && height / 2 >= targetSize) denom = 2;
+
+ if (denom > 1) {
+ scaledWidth = width / denom;
+ scaledHeight = height / denom;
+ flags |= TJFLAG_FASTUPSAMPLE;
+ }
+ }
+
+ TView * pView = new TView(scaledWidth, scaledHeight, TView::Bgra32, NULL, 4);
+ if (::tjDecompress2(_handle, data, size, pView->data, scaledWidth, 0, scaledHeight, ::TJPF_RGBA, flags) != 0 && ::tjGetErrorCode(_handle) != ::TJERR_WARNING)
{
- //int code = ::tjGetErrorCode(_handle);
- //const char * str = ::tjGetErrorStr2(_handle);
delete pView;
pView = NULL;
}
@@ -72,19 +93,26 @@ namespace ad
thread_local TurboJpeg turboJpeg;
- TTurboJpeg * TTurboJpeg::Load(HGLOBAL hGlobal)
+ TTurboJpeg * TTurboJpeg::Load(HGLOBAL hGlobal, int targetSize)
{
if (hGlobal)
{
const unsigned char * data = (unsigned char*)::GlobalLock(hGlobal);
size_t size = ::GlobalSize(hGlobal);
TTurboJpeg * pTurboJpeg = NULL;
- TView * pView = turboJpeg.Decompress(data, size);
+ TView * pView = turboJpeg.Decompress(data, size, targetSize);
if (pView)
{
pTurboJpeg = new TTurboJpeg();
pTurboJpeg->m_format = TImage::Jpeg;
pTurboJpeg->m_pView = pView;
+
+ // Сохраняем оригинальные размеры из заголовка JPEG
+ int subsamp, colorspace, origWidth, origHeight;
+ if(::tjDecompressHeader3(turboJpeg.Handle(), data, (unsigned long)size, &origWidth, &origHeight, &subsamp, &colorspace) == 0) {
+ pTurboJpeg->m_origWidth = origWidth;
+ pTurboJpeg->m_origHeight = origHeight;
+ }
}
::GlobalUnlock(hGlobal);
return pTurboJpeg;
diff --git a/src/AntiDupl/adTurboJpeg.h b/src/AntiDupl/adTurboJpeg.h
index 6d5ff8c4..b8437c95 100644
--- a/src/AntiDupl/adTurboJpeg.h
+++ b/src/AntiDupl/adTurboJpeg.h
@@ -32,8 +32,15 @@ namespace ad
class TTurboJpeg : public TImage
{
public:
- static TTurboJpeg * Load(HGLOBAL hGlobal);
+ static TTurboJpeg * Load(HGLOBAL hGlobal, int targetSize = 0);
static bool Supported(HGLOBAL hGlobal);
+
+ int OriginalWidth() const { return m_origWidth; }
+ int OriginalHeight() const { return m_origHeight; }
+
+ private:
+ int m_origWidth = 0;
+ int m_origHeight = 0;
};
}
#endif//AD_TURBO_JPEG_ENABLE