From 6f560da1f8de21b9b35defb5ae82778d22a12da8 Mon Sep 17 00:00:00 2001 From: aby913 Date: Sat, 28 Feb 2026 20:26:03 +0800 Subject: [PATCH 01/45] refactor: syncer pipeline --- internal/v2/appinfo/appinfomodule.go | 28 +- internal/v2/appinfo/cache.go | 13 +- internal/v2/appinfo/datawatcher_app.go | 122 +++++++- internal/v2/appinfo/datawatcher_repo.go | 24 ++ internal/v2/appinfo/datawatcher_state.go | 75 +++-- internal/v2/appinfo/hydration.go | 281 +++++++++++++++++- .../v2/appinfo/status_correction_check.go | 29 ++ 7 files changed, 523 insertions(+), 49 deletions(-) diff --git a/internal/v2/appinfo/appinfomodule.go b/internal/v2/appinfo/appinfomodule.go index e3ba9e2..8c5f459 100644 --- a/internal/v2/appinfo/appinfomodule.go +++ b/internal/v2/appinfo/appinfomodule.go @@ -584,10 +584,16 @@ func (m *AppInfoModule) initDataWatcher() error { m.dataWatcher = NewDataWatcher(m.cacheManager, m.hydrator, m.dataSender) // Start DataWatcher - if err := m.dataWatcher.Start(m.ctx); err != nil { + // if err := m.dataWatcher.Start(m.ctx); err != nil { + // return fmt.Errorf("failed to start DataWatcher: %w", err) + // } + if err := m.dataWatcher.StartWithOptions(m.ctx, false); err != nil { return fmt.Errorf("failed to start DataWatcher: %w", err) } + // Wire DataWatcher into Hydrator's serial pipeline + m.hydrator.SetDataWatcher(m.dataWatcher) + glog.V(2).Info("DataWatcher initialized successfully") return nil } @@ -648,10 +654,18 @@ func (m *AppInfoModule) initDataWatcherRepo() error { m.dataWatcherRepo = NewDataWatcherRepo(m.redisClient, m.cacheManager, m.dataWatcher, m.dataSender) // Start DataWatcherRepo - if err := m.dataWatcherRepo.Start(); err != nil { + // if err := m.dataWatcherRepo.Start(); err != nil { + // return fmt.Errorf("failed to start DataWatcherRepo: %w", err) + // } + + if err := m.dataWatcherRepo.StartWithOptions(false); err != nil { return fmt.Errorf("failed to start DataWatcherRepo: %w", err) } + if m.hydrator != nil { + m.hydrator.SetDataWatcherRepo(m.dataWatcherRepo) + } + glog.V(2).Info("DataWatcherRepo initialized successfully") return nil } @@ -667,10 +681,18 @@ func (m *AppInfoModule) initStatusCorrectionChecker() error { m.statusCorrectionChecker = NewStatusCorrectionChecker(m.cacheManager) // Start StatusCorrectionChecker - if err := m.statusCorrectionChecker.Start(); err != nil { + // if err := m.statusCorrectionChecker.Start(); err != nil { + // return fmt.Errorf("failed to start StatusCorrectionChecker: %w", err) + // } + + if err := m.statusCorrectionChecker.StartWithOptions(false); err != nil { return fmt.Errorf("failed to start StatusCorrectionChecker: %w", err) } + if m.hydrator != nil { + m.hydrator.SetStatusCorrectionChecker(m.statusCorrectionChecker) + } + glog.V(2).Info("StatusCorrectionChecker initialized successfully") return nil } diff --git a/internal/v2/appinfo/cache.go b/internal/v2/appinfo/cache.go index daa0b30..b5e38e8 100644 --- a/internal/v2/appinfo/cache.go +++ b/internal/v2/appinfo/cache.go @@ -120,7 +120,7 @@ func (cm *CacheManager) GetUserDataNoLock(userID string) *UserData { func (cm *CacheManager) GetUserDataWithFallback(userID string) *UserData { if !cm.mutex.TryRLock() { // Lock not available immediately, return nil to avoid blocking - glog.Warningf("[TryRLock] GetUserData: Read lock not available for user %s, returning nil", userID) + glog.Warningf("[TryRLock] GetUserDataWithFallback: Read lock not available for user %s, returning nil", userID) return nil } defer cm.mutex.RUnlock() @@ -203,7 +203,7 @@ func NewCacheManager(redisClient *RedisClient, userConfig *UserConfig) *CacheMan // Start initializes the cache by loading data from Redis and starts the sync worker func (cm *CacheManager) Start() error { - glog.V(3).Infof("Starting cache manager") + glog.V(2).Infof("Starting cache manager") // Load cache data from Redis if ClearCache is false if !cm.userConfig.ClearCache { @@ -278,11 +278,11 @@ func (cm *CacheManager) Start() error { _wd() // Start sync worker goroutine - go cm.syncWorker() + go cm.syncWorker() // + 临时注释 // Start periodic cleanup of AppRenderFailed data (every 5 minutes) cm.cleanupTicker = time.NewTicker(5 * time.Minute) - go cm.cleanupWorker() + go cm.cleanupWorker() // + glog.V(3).Infof("Cache manager started successfully") return nil @@ -963,7 +963,7 @@ func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType App } glog.V(3).Infof("Successfully processed %d apps from market data for user=%s, source=%s", len(sourceData.AppInfoLatestPending), userID, sourceID) - } else { + } else { // + 走这里 // This might be market data with nested apps structure, try to extract apps glog.V(3).Infof("DEBUG: CALL POINT 2 - Processing potential market data for user=%s, source=%s", userID, sourceID) glog.V(3).Infof("DEBUG: CALL POINT 2 - Data before processing: %+v", data) @@ -1011,7 +1011,8 @@ func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType App // Notify hydrator about pending data update for immediate task creation if cm.hydrationNotifier != nil && len(sourceData.AppInfoLatestPending) > 0 { glog.V(3).Infof("Notifying hydrator about pending data update for user=%s, source=%s", userID, sourceID) - go cm.hydrationNotifier.NotifyPendingDataUpdate(userID, sourceID, data) + glog.V(2).Info("Serial pipeline, syncer done, continue...") + go cm.hydrationNotifier.NotifyPendingDataUpdate(userID, sourceID, data) // +++++ } case types.AppRenderFailed: // Handle render failed data - this is typically set by the hydrator when tasks fail diff --git a/internal/v2/appinfo/datawatcher_app.go b/internal/v2/appinfo/datawatcher_app.go index 365b345..ce828df 100644 --- a/internal/v2/appinfo/datawatcher_app.go +++ b/internal/v2/appinfo/datawatcher_app.go @@ -51,6 +51,32 @@ func NewDataWatcher(cacheManager *CacheManager, hydrator *Hydrator, dataSender * // Start begins the data watching process func (dw *DataWatcher) Start(ctx context.Context) error { + // if atomic.LoadInt32(&dw.isRunning) == 1 { + // return fmt.Errorf("DataWatcher is already running") + // } + + // if dw.cacheManager == nil { + // return fmt.Errorf("CacheManager is required for DataWatcher") + // } + + // if dw.hydrator == nil { + // return fmt.Errorf("Hydrator is required for DataWatcher") + // } + + // atomic.StoreInt32(&dw.isRunning, 1) + // glog.Infof("Starting DataWatcher with interval: %v", time.Duration(atomic.LoadInt64((*int64)(&dw.interval)))) + + // // Start the monitoring goroutine + // go dw.watchLoop(ctx) + + // return nil + + return dw.StartWithOptions(ctx, true) +} + +// StartWithOptions begins the data watching process with options +// If enableWatchLoop is false, the periodic watchLoop is not started (used when serial pipeline handles processing) +func (dw *DataWatcher) StartWithOptions(ctx context.Context, enableWatchLoop bool) error { if atomic.LoadInt32(&dw.isRunning) == 1 { return fmt.Errorf("DataWatcher is already running") } @@ -64,10 +90,13 @@ func (dw *DataWatcher) Start(ctx context.Context) error { } atomic.StoreInt32(&dw.isRunning, 1) - glog.Infof("Starting DataWatcher with interval: %v", time.Duration(atomic.LoadInt64((*int64)(&dw.interval)))) - // Start the monitoring goroutine - go dw.watchLoop(ctx) + if enableWatchLoop { + glog.Infof("Starting DataWatcher with interval: %v", time.Duration(atomic.LoadInt64((*int64)(&dw.interval)))) + go dw.watchLoop(ctx) + } else { + glog.Infof("Starting DataWatcher in passive mode (serial pipeline handles processing)") + } return nil } @@ -351,6 +380,7 @@ func (dw *DataWatcher) calculateAndSetUserHashWithRetry(userID string, userData // calculateAndSetUserHashDirect calculates hash without tracking (used internally by goroutines) func (dw *DataWatcher) calculateAndSetUserHashDirect(userID string, userData *types.UserData) bool { + glog.V(2).Infof("Serial pipeline: DataWatcherApp, user: %s", userID) glog.V(3).Infof("DataWatcher: Starting direct hash calculation for user %s", userID) // Get the original user data from cache manager to ensure we have the latest reference @@ -1244,3 +1274,89 @@ func (dw *DataWatcher) sendNewAppReadyNotification(userID string, completedApp * glog.V(2).Infof("DataWatcher: Successfully sent new app ready notification for app %s (version: %s, source: %s)", appName, appVersion, sourceID) } } + +// ProcessSingleAppToLatest moves a single completed pending app to AppInfoLatest +// Returns true if the app was successfully moved +func (dw *DataWatcher) ProcessSingleAppToLatest(userID, sourceID string, pendingApp *types.AppInfoLatestPendingData) bool { + if pendingApp == nil { + return false + } + + // Check hydration completion + if dw.hydrator != nil && !dw.hydrator.isAppHydrationComplete(pendingApp) { + return false + } + + // Convert to latest data + latestData := dw.convertPendingToLatest(pendingApp) + if latestData == nil { + return false + } + + glog.V(2).Infof("Serial pipeline, datawatcher_app user: %s, source: %s, id: %s, name: %s", userID, sourceID, pendingApp.AppInfo.AppEntry.ID, pendingApp.AppInfo.AppEntry.Name) + + // Acquire write lock to move data + if !dw.cacheManager.mutex.TryLock() { + glog.Warningf("[TryLock] ProcessSingleAppToLatest: Write lock not available for user=%s, source=%s, skipping", userID, sourceID) + return false + } + defer dw.cacheManager.mutex.Unlock() + + userData, userExists := dw.cacheManager.cache.Users[userID] + if !userExists { + return false + } + sourceData, sourceExists := userData.Sources[sourceID] + if !sourceExists { + return false + } + + appName := dw.getAppName(pendingApp) + appID := dw.getAppID(pendingApp) + + // Check if app with same name already exists in AppInfoLatest + existingIndex := -1 + for i, existingApp := range sourceData.AppInfoLatest { + if existingApp != nil { + existingAppName := dw.getAppNameFromLatest(existingApp) + if existingAppName == appName { + existingIndex = i + break + } + } + } + + if existingIndex >= 0 { + if latestData.AppInfo != nil && latestData.AppInfo.AppEntry != nil && + sourceData.AppInfoLatest[existingIndex].AppInfo != nil && + sourceData.AppInfoLatest[existingIndex].AppInfo.AppEntry != nil && + latestData.AppInfo.AppEntry.Version != sourceData.AppInfoLatest[existingIndex].AppInfo.AppEntry.Version { + dw.sendNewAppReadyNotification(userID, pendingApp, sourceID) + } + sourceData.AppInfoLatest[existingIndex] = latestData + glog.V(2).Infof("ProcessSingleAppToLatest: replaced existing app %s (user=%s, source=%s)", appName, userID, sourceID) + } else { + sourceData.AppInfoLatest = append(sourceData.AppInfoLatest, latestData) + glog.V(2).Infof("ProcessSingleAppToLatest: added new app %s (user=%s, source=%s)", appName, userID, sourceID) + dw.sendNewAppReadyNotification(userID, pendingApp, sourceID) + } + + // Remove from pending list + newPendingList := make([]*types.AppInfoLatestPendingData, 0, len(sourceData.AppInfoLatestPending)) + for _, p := range sourceData.AppInfoLatestPending { + pID := dw.getAppID(p) + if pID != appID { + newPendingList = append(newPendingList, p) + } + } + sourceData.AppInfoLatestPending = newPendingList + + atomic.AddInt64(&dw.totalAppsMoved, 1) + glog.Infof("ProcessSingleAppToLatest: successfully moved app %s to Latest (user=%s, source=%s)", appName, userID, sourceID) + return true +} + +// CalculateAndSetUserHashDirect is a public wrapper for calculateAndSetUserHashDirect +func (dw *DataWatcher) CalculateAndSetUserHashDirect(userID string, userData *types.UserData) bool { + return dw.calculateAndSetUserHashDirect(userID, userData) +} diff --git a/internal/v2/appinfo/datawatcher_repo.go b/internal/v2/appinfo/datawatcher_repo.go index dff7ba3..4e2235f 100644 --- a/internal/v2/appinfo/datawatcher_repo.go +++ b/internal/v2/appinfo/datawatcher_repo.go @@ -143,6 +143,30 @@ func (dwr *DataWatcherRepo) Start() error { return nil } +// StartWithOptions starts with options, if enablePolling is false, the periodic polling is not started +func (dwr *DataWatcherRepo) StartWithOptions(enablePolling bool) error { + dwr.mu.Lock() + defer dwr.mu.Unlock() + + if dwr.isRunning { + return fmt.Errorf("DataWatcherRepo is already running") + } + + dwr.isRunning = true + glog.V(3).Info("Starting DataWatcherRepo in passive mode (serial pipeline handles processing)") + + return nil +} + +// ProcessOnce executes one round of state change processing, called by serial pipeline +func (dwr *DataWatcherRepo) ProcessOnce() { + if !dwr.isRunning { + return + } + + dwr.processStateChanges() +} + // Stop stops the periodic state checking process func (dwr *DataWatcherRepo) Stop() error { dwr.mu.Lock() diff --git a/internal/v2/appinfo/datawatcher_state.go b/internal/v2/appinfo/datawatcher_state.go index 1432e11..bf17127 100644 --- a/internal/v2/appinfo/datawatcher_state.go +++ b/internal/v2/appinfo/datawatcher_state.go @@ -158,21 +158,33 @@ func (dw *DataWatcherState) resolveInvisibleFlag(raw *bool, entranceName, appNam func (dw *DataWatcherState) fetchInvisibleFromAppService(appName, userID, entranceName string) (bool, error) { // Check cache first (using TryRLock to avoid blocking) cacheKey := fmt.Sprintf("%s:%s", userID, appName) - if dw.appServiceCacheMutex.TryRLock() { - if appCache, exists := dw.appServiceCache[cacheKey]; exists { - if invisible, found := appCache[entranceName]; found { - dw.appServiceCacheMutex.RUnlock() - glog.V(3).Infof("DEBUG: fetchInvisibleFromAppService - using cached invisible=%t for entrance %s (app=%s, user=%s)", - invisible, entranceName, appName, userID) - return invisible, nil - } + dw.appServiceCacheMutex.RLock() + defer dw.appServiceCacheMutex.RUnlock() + + if appCache, exists := dw.appServiceCache[cacheKey]; exists { + if invisible, found := appCache[entranceName]; found { + // dw.appServiceCacheMutex.RUnlock() + glog.V(3).Infof("DEBUG: fetchInvisibleFromAppService - using cached invisible=%t for entrance %s (app=%s, user=%s)", + invisible, entranceName, appName, userID) + return invisible, nil } - dw.appServiceCacheMutex.RUnlock() - } else { - glog.Warningf("[TryRLock] DEBUG: fetchInvisibleFromAppService - read lock not available, skipping cache check for entrance %s (app=%s, user=%s)", - entranceName, appName, userID) } + // if dw.appServiceCacheMutex.TryRLock() { + // if appCache, exists := dw.appServiceCache[cacheKey]; exists { + // if invisible, found := appCache[entranceName]; found { + // dw.appServiceCacheMutex.RUnlock() + // glog.V(3).Infof("DEBUG: fetchInvisibleFromAppService - using cached invisible=%t for entrance %s (app=%s, user=%s)", + // invisible, entranceName, appName, userID) + // return invisible, nil + // } + // } + // dw.appServiceCacheMutex.RUnlock() + // } else { + // glog.Warningf("[TryRLock] DEBUG: fetchInvisibleFromAppService - read lock not available, skipping cache check for entrance %s (app=%s, user=%s)", + // entranceName, appName, userID) + // } + // Fetch from API host := getEnvOrDefault("APP_SERVICE_SERVICE_HOST", "localhost") port := getEnvOrDefault("APP_SERVICE_SERVICE_PORT", "80") @@ -221,20 +233,33 @@ func (dw *DataWatcherState) fetchInvisibleFromAppService(appName, userID, entran } // Cache all entrances for this app to avoid future API calls (using TryLock to avoid blocking) - if dw.appServiceCacheMutex.TryLock() { - if dw.appServiceCache[cacheKey] == nil { - dw.appServiceCache[cacheKey] = make(map[string]bool) - } - for _, specEntrance := range app.Spec.Entrances { - dw.appServiceCache[cacheKey][specEntrance.Name] = specEntrance.Invisible - } - dw.appServiceCacheMutex.Unlock() - glog.V(3).Infof("DEBUG: fetchInvisibleFromAppService - fetched and cached invisible=%t for entrance %s (app=%s, user=%s)", - invisibleValue, entranceName, appName, userID) - } else { - glog.Warningf("[TryLock] DEBUG: fetchInvisibleFromAppService - write lock not available, skipping cache update for entrance %s (app=%s, user=%s)", - entranceName, appName, userID) + dw.appServiceCacheMutex.Lock() + defer dw.appServiceCacheMutex.Unlock() + + if dw.appServiceCache[cacheKey] == nil { + dw.appServiceCache[cacheKey] = make(map[string]bool) + } + for _, specEntrance := range app.Spec.Entrances { + dw.appServiceCache[cacheKey][specEntrance.Name] = specEntrance.Invisible } + // dw.appServiceCacheMutex.Unlock() + glog.V(3).Infof("DEBUG: fetchInvisibleFromAppService - fetched and cached invisible=%t for entrance %s (app=%s, user=%s)", + invisibleValue, entranceName, appName, userID) + + // if dw.appServiceCacheMutex.TryLock() { + // if dw.appServiceCache[cacheKey] == nil { + // dw.appServiceCache[cacheKey] = make(map[string]bool) + // } + // for _, specEntrance := range app.Spec.Entrances { + // dw.appServiceCache[cacheKey][specEntrance.Name] = specEntrance.Invisible + // } + // dw.appServiceCacheMutex.Unlock() + // glog.V(3).Infof("DEBUG: fetchInvisibleFromAppService - fetched and cached invisible=%t for entrance %s (app=%s, user=%s)", + // invisibleValue, entranceName, appName, userID) + // } else { + // glog.Warningf("[TryLock] DEBUG: fetchInvisibleFromAppService - write lock not available, skipping cache update for entrance %s (app=%s, user=%s)", + // entranceName, appName, userID) + // } return invisibleValue, nil } diff --git a/internal/v2/appinfo/hydration.go b/internal/v2/appinfo/hydration.go index ea6b0a3..1306121 100644 --- a/internal/v2/appinfo/hydration.go +++ b/internal/v2/appinfo/hydration.go @@ -35,6 +35,13 @@ type Hydrator struct { failedTasks map[string]*hydrationfn.HydrationTask taskMutex sync.RWMutex + // Serial pipeline (replaces parallel workers + pendingDataMonitor + DataWatcher.watchLoop) + dataWatcher *DataWatcher + dataWatcherRepo *DataWatcherRepo + statusCorrectionChecker *StatusCorrectionChecker + pipelineTrigger chan struct{} + pipelineMutex sync.Mutex + // Cache access mutex for unified lock strategy - removed, use CacheManager.mutex instead // Batch completion tracking @@ -112,6 +119,7 @@ func NewHydrator(cache *types.CacheData, settingsManager *settings.SettingsManag settingsManager: settingsManager, cacheManager: cacheManager, taskQueue: make(chan *hydrationfn.HydrationTask, config.QueueSize), + pipelineTrigger: make(chan struct{}, 1), workerCount: config.WorkerCount, stopChan: make(chan struct{}), isRunning: atomic.Bool{}, // Initialize atomic.Bool @@ -172,12 +180,15 @@ func (h *Hydrator) Start(ctx context.Context) error { glog.V(3).Infof("Starting hydrator with %d workers and %d steps", h.workerCount, len(h.steps)) // Start worker goroutines - for i := 0; i < h.workerCount; i++ { - go h.worker(ctx, i) - } + // for i := 0; i < h.workerCount; i++ { + // go h.worker(ctx, i) + // } + + // // Start pending data monitor + // go h.pendingDataMonitor(ctx) - // Start pending data monitor - go h.pendingDataMonitor(ctx) + // Start serial pipeline loop (replaces worker pool + pendingDataMonitor + DataWatcher watchLoop) + go h.serialPipelineLoop(ctx) // Start batch completion processor go h.batchCompletionProcessor(ctx) @@ -1373,11 +1384,11 @@ func CreateDefaultHydrator(cache *types.CacheData, settingsManager *settings.Set // Processes pending data update notification and creates hydration tasks immediately func (h *Hydrator) NotifyPendingDataUpdate(userID, sourceID string, pendingData map[string]interface{}) { if !h.IsRunning() { - glog.V(3).Infof("Hydrator is not running, ignoring pending data notification for user: %s, source: %s", userID, sourceID) + glog.V(2).Infof("Hydrator is not running, ignoring pending data notification for user: %s, source: %s", userID, sourceID) return } - glog.V(3).Infof("Received pending data update notification for user: %s, source: %s", userID, sourceID) + glog.V(2).Infof("Received pending data update notification for user: %s, source: %s", userID, sourceID) // Create tasks from the pending data immediately h.createTasksFromPendingDataMap(userID, sourceID, pendingData) @@ -1440,7 +1451,7 @@ func (h *Hydrator) createTasksFromPendingDataMap(userID, sourceID string, pendin if appMap, ok := appData.(map[string]interface{}); ok { // Check if app data contains necessary raw data fields before creating task if !h.hasRequiredRawDataFields(appMap) { - glog.V(3).Infof("App %s (user: %s, source: %s) missing required raw data fields, skipping task creation", + glog.Warningf("Serial pipeline, App %s (user: %s, source: %s) missing required raw data fields, skipping task creation", appID, userID, sourceID) continue } @@ -1449,7 +1460,7 @@ func (h *Hydrator) createTasksFromPendingDataMap(userID, sourceID string, pendin if !h.hasActiveTaskForApp(userID, sourceID, appID, appName) { // Check if app is already in render failed list if h.isAppInRenderFailedList(userID, sourceID, appID, appName) { - glog.V(3).Infof("App %s (user: %s, source: %s) is already in render failed list, skipping task creation", + glog.Warningf("Serial pipeline, App %s (user: %s, source: %s) is already in render failed list, skipping task creation", appID, userID, sourceID) continue } @@ -1463,13 +1474,13 @@ func (h *Hydrator) createTasksFromPendingDataMap(userID, sourceID string, pendin } } if h.isAppInLatestQueue(userID, sourceID, appID, appName, version) { - // glog.Infof("App hydration already complete for app: %s (user: %s, source: %s), skipping task creation", - // appID, userID, sourceID) + glog.Infof("App hydration already complete for app: %s (user: %s, source: %s), skipping task creation", + appID, userID, sourceID) continue } if len(appMap) == 0 { - glog.V(3).Infof("Warning: Empty app data for app: %s (user: %s, source: %s), skipping task creation", + glog.Warningf("Warning: Empty app data for app: %s (user: %s, source: %s), skipping task creation", appID, userID, sourceID) continue } @@ -2113,3 +2124,249 @@ func (h *Hydrator) ForceCheckPendingData() { glog.V(3).Infof("Force checking pending data triggered externally") h.checkForPendingData() } + +// SetDataWatcher sets the DataWatcher reference for the serial pipeline +func (h *Hydrator) SetDataWatcher(dw *DataWatcher) { + h.dataWatcher = dw +} + +func (h *Hydrator) SetDataWatcherRepo(dwr *DataWatcherRepo) { + h.dataWatcherRepo = dwr +} + +func (h *Hydrator) SetStatusCorrectionChecker(scc *StatusCorrectionChecker) { + h.statusCorrectionChecker = scc +} + +// serialPipelineLoop runs the serial pipeline, triggered by notifications or periodic timer +func (h *Hydrator) serialPipelineLoop(ctx context.Context) { + glog.V(3).Info("Serial pipeline loop started") + defer glog.V(3).Info("Serial pipeline loop stopped") + + ticker := time.NewTicker(30 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-h.stopChan: + return + case <-h.pipelineTrigger: + h.runSerialPipeline(ctx) + case <-ticker.C: + h.runSerialPipeline(ctx) + } + } +} + +// runSerialPipeline collects all pending apps and processes them one by one +func (h *Hydrator) runSerialPipeline(ctx context.Context) { + if !h.pipelineMutex.TryLock() { + glog.V(3).Info("Serial pipeline: another run in progress, skipping") + return + } + defer h.pipelineMutex.Unlock() + + if h.cacheManager == nil { + return + } + + type pendingItem struct { + userID string + sourceID string + pending *types.AppInfoLatestPendingData + } + + // Step 1: Read-lock to snapshot all pending data + if !h.cacheManager.mutex.TryRLock() { + glog.Warning("[TryRLock] serialPipeline: CacheManager read lock not available, skipping") + return + } + var items []pendingItem + for userID, userData := range h.cache.Users { + for sourceID, sourceData := range userData.Sources { + for _, pd := range sourceData.AppInfoLatestPending { + if pd != nil { + items = append(items, pendingItem{userID, sourceID, pd}) + } + } + } + } + h.cacheManager.mutex.RUnlock() + + if len(items) == 0 { + return + } + + // glog.V(2).Infof("Serial pipeline: hydrator, found %d pending apps to process", len(items)) + if len(items) > 0 { + glog.V(2).Infof("Serial pipeline Phase 1: processing %d pending apps", len(items)) + } + + // Step 2: Process each app serially through the full pipeline + affectedUsers := make(map[string]bool) + var total = len(items) + for idx, item := range items { + select { + case <-ctx.Done(): + return + case <-h.stopChan: + return + default: + } + + glog.V(2).Infof("Serial pipeline: user: %s, source: %s, id: %s, name: %s, %d/%d", item.userID, item.sourceID, item.pending.AppInfo.AppEntry.ID, item.pending.AppInfo.AppEntry.Name, idx+1, total) + h.processSingleAppFullPipeline(ctx, item.userID, item.sourceID, item.pending) // + + affectedUsers[item.userID] = true + } + + glog.V(2).Info("Serial pipeline done, continue...") + + // Step 3: Calculate hash and sync for affected users + if h.dataWatcher != nil { + for userID := range affectedUsers { + userData := h.cacheManager.GetUserData(userID) + if userData != nil { + h.dataWatcher.CalculateAndSetUserHashDirect(userID, userData) + } + } + } + + // ========== Phase 2: DataWatcherRepo ========== + if h.dataWatcherRepo != nil { + select { + case <-ctx.Done(): + return + case <-h.stopChan: + return + default: + } + glog.V(3).Info("Serial pipeline Phase 2: processing DataWatcherRepo") + h.dataWatcherRepo.ProcessOnce() + } + + // ========== Phase 3: StatusCorrectionChecker ========== + if h.statusCorrectionChecker != nil { + select { + case <-ctx.Done(): + return + case <-h.stopChan: + return + default: + } + glog.V(3).Info("Serial pipeline Phase 3: processing StatusCorrectionChecker") + h.statusCorrectionChecker.PerformStatusCheckOnce() + } + + // ========== Phase 4: Hash + Sync ========== + if h.dataWatcher != nil { + for userID := range affectedUsers { + userData := h.cacheManager.GetUserData(userID) + if userData != nil { + h.dataWatcher.CalculateAndSetUserHashDirect(userID, userData) + } + } + } + + // Step 4: Force sync + if err := h.cacheManager.ForceSync(); err != nil { + glog.Errorf("Serial pipeline: ForceSync failed: %v", err) + } +} + +// processSingleAppFullPipeline processes a single app through hydration + move to latest +func (h *Hydrator) processSingleAppFullPipeline(ctx context.Context, userID, sourceID string, pendingData *types.AppInfoLatestPendingData) { + if pendingData == nil || pendingData.RawData == nil { + return + } + + appID := pendingData.RawData.AppID + if appID == "" { + appID = pendingData.RawData.ID + } + appName := pendingData.RawData.Name + if appID == "" { + return + } + + // Skip if in render failed list + if h.isAppInRenderFailedList(userID, sourceID, appID, appName) { + return + } + + // Check if already hydrated → move to latest directly + if h.isAppHydrationComplete(pendingData) { // + + if h.dataWatcher != nil { + h.dataWatcher.ProcessSingleAppToLatest(userID, sourceID, pendingData) // + processSingleAppFullPipeline + } + return + } + + // Skip if already in latest queue with matching version + version := "" + if pendingData.RawData != nil { + version = pendingData.RawData.Version + } + if h.isAppInLatestQueue(userID, sourceID, appID, appName, version) { + return + } + + // Create a task for hydration step execution + appDataMap := h.convertApplicationInfoEntryToMap(pendingData.RawData) + if len(appDataMap) == 0 { + return + } + + var cacheManagerIface types.CacheManagerInterface + if h.cacheManager != nil { + cacheManagerIface = h.cacheManager + } + task := hydrationfn.NewHydrationTaskWithManager( + userID, sourceID, appID, + appDataMap, h.cache, cacheManagerIface, h.settingsManager, + ) + + glog.V(2).Infof("Serial pipeline: processing app %s %s (user=%s, source=%s)", appID, appName, userID, sourceID) + taskStartTime := time.Now() + + // Execute hydration steps synchronously + for i, step := range h.steps { + _ = i + if step.CanSkip(ctx, task) { + task.IncrementStep() + continue + } + + if err := step.Execute(ctx, task); err != nil { + failureReason := err.Error() + failureStep := step.GetStepName() + glog.Errorf("Serial pipeline: step %s failed for app %s %s: %v", failureStep, appID, appName, err) + + h.moveTaskToRenderFailed(task, failureReason, failureStep) + duration := time.Since(taskStartTime) + h.markTaskFailed(task, taskStartTime, duration, failureStep, failureReason) + return + } + + task.IncrementStep() + } + + if !h.isAppHydrationComplete(pendingData) { + glog.Warningf("Serial pipeline: hydration steps completed but data incomplete for app %s %s (user=%s, source=%s), will retry next cycle", + appID, appName, userID, sourceID) + return + } + + // All steps completed + task.SetStatus(hydrationfn.TaskStatusCompleted) + duration := time.Since(taskStartTime) + h.markTaskCompleted(task, taskStartTime, duration) + + glog.V(2).Infof("Serial pipeline: hydration completed for app %s %s, moving to latest", appID, appName) + + // Move to Latest + if h.dataWatcher != nil { + h.dataWatcher.ProcessSingleAppToLatest(userID, sourceID, pendingData) // + processSingleAppFullPipeline + } +} diff --git a/internal/v2/appinfo/status_correction_check.go b/internal/v2/appinfo/status_correction_check.go index cfd5fc2..53e152a 100644 --- a/internal/v2/appinfo/status_correction_check.go +++ b/internal/v2/appinfo/status_correction_check.go @@ -107,6 +107,35 @@ func (scc *StatusCorrectionChecker) Start() error { return nil } +// StartWithOptions starts with options +func (scc *StatusCorrectionChecker) StartWithOptions(enablePeriodicCheck bool) error { + scc.mutex.Lock() + defer scc.mutex.Unlock() + + if scc.isRunning { + return fmt.Errorf("status correction checker is already running") + } + + scc.isRunning = true + + if enablePeriodicCheck { + glog.Infof("Starting status correction checker with interval: %v", scc.checkInterval) + go scc.runPeriodicCheck() + } else { + glog.Infof("Starting status correction checker in passive mode (serial pipeline handles processing)") + } + + return nil +} + +// PerformStatusCheckOnce executes one status check cycle, called by serial pipeline +func (scc *StatusCorrectionChecker) PerformStatusCheckOnce() { + if !scc.isRunning { + return + } + scc.performStatusCheck() +} + // Stop stops the periodic status checking func (scc *StatusCorrectionChecker) Stop() { scc.mutex.Lock() From e87f00de8a301fa777aa0473f1996cb0dbabc41e Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 28 Feb 2026 13:00:08 +0000 Subject: [PATCH 02/45] refactor: extract serial pipeline into standalone component - New pipeline.go: orchestrates Syncer -> Hydrator -> DataWatcherRepo -> StatusCorrectionChecker serially - Pipeline implements HydrationNotifier interface, replaces Hydrator as CacheManager's notifier - Hydrator: add HydrateSingleApp() public method, remove internal pipeline logic, passive mode - Syncer: add SyncOnce() and StartWithOptions() for Pipeline-driven scheduling - cache.go: add version filtering in AppInfoLatestPending - skip apps already in Latest with same version - datawatcher_state.go: fix RLock -> Lock deadlock in fetchInvisibleFromAppService - datawatcher_app.go: fix potential nil pointer panic in log, clean up commented code - appinfomodule.go: create Pipeline, wire all components, all timers disabled (Pipeline handles scheduling) Co-authored-by: aby913 --- internal/v2/appinfo/appinfomodule.go | 74 ++--- internal/v2/appinfo/cache.go | 127 +++++---- internal/v2/appinfo/datawatcher_app.go | 27 +- internal/v2/appinfo/datawatcher_state.go | 56 +--- internal/v2/appinfo/hydration.go | 285 +------------------ internal/v2/appinfo/pipeline.go | 345 +++++++++++++++++++++++ internal/v2/appinfo/syncer.go | 27 +- 7 files changed, 501 insertions(+), 440 deletions(-) create mode 100644 internal/v2/appinfo/pipeline.go diff --git a/internal/v2/appinfo/appinfomodule.go b/internal/v2/appinfo/appinfomodule.go index 8c5f459..420ddf9 100644 --- a/internal/v2/appinfo/appinfomodule.go +++ b/internal/v2/appinfo/appinfomodule.go @@ -26,10 +26,11 @@ type AppInfoModule struct { redisClient *RedisClient syncer *Syncer hydrator *Hydrator + pipeline *Pipeline dataWatcher *DataWatcher dataWatcherState *DataWatcherState dataWatcherUser *DataWatcherUser - dataWatcherRepo *DataWatcherRepo // Add DataWatcherRepo for image info updates + dataWatcherRepo *DataWatcherRepo dataSender *DataSender statusCorrectionChecker *StatusCorrectionChecker settingsManager *settings.SettingsManager @@ -223,10 +224,30 @@ func (m *AppInfoModule) Start() error { } } - // Set up hydration notifier connection if both cache and hydrator are enabled - if m.config.EnableCache && m.config.EnableHydrator && m.cacheManager != nil && m.hydrator != nil { - m.cacheManager.SetHydrationNotifier(m.hydrator) - glog.Infof("Hydration notifier connection established between cache manager and hydrator") + // Create and start Pipeline to orchestrate all components serially + if m.config.EnableHydrator && m.cacheManager != nil { + p := NewPipeline(m.cacheManager, m.cacheManager.cache, 30*time.Second) + if m.syncer != nil { + p.SetSyncer(m.syncer) + } + if m.hydrator != nil { + p.SetHydrator(m.hydrator) + } + if m.dataWatcher != nil { + p.SetDataWatcher(m.dataWatcher) + } + if m.dataWatcherRepo != nil { + p.SetDataWatcherRepo(m.dataWatcherRepo) + } + if m.statusCorrectionChecker != nil { + p.SetStatusCorrectionChecker(m.statusCorrectionChecker) + } + m.cacheManager.SetHydrationNotifier(p) + if err := p.Start(m.ctx); err != nil { + return fmt.Errorf("failed to start Pipeline: %w", err) + } + m.pipeline = p + glog.Infof("Pipeline started, all components orchestrated serially") } m.isStarted = true @@ -245,7 +266,11 @@ func (m *AppInfoModule) Stop() error { glog.V(3).Info("Stopping AppInfo module...") - // Stop components in reverse order + // Stop Pipeline first (it orchestrates other components) + if m.pipeline != nil { + m.pipeline.Stop() + } + if m.hydrator != nil { m.hydrator.Stop() } @@ -517,12 +542,12 @@ func (m *AppInfoModule) initSyncer() error { glog.V(3).Info("Cache manager reference set in syncer for hydration notifications") } - // Start syncer - if err := m.syncer.Start(m.ctx); err != nil { + // Start syncer in passive mode (Pipeline handles scheduling) + if err := m.syncer.StartWithOptions(m.ctx, false); err != nil { return fmt.Errorf("failed to start syncer: %w", err) } - glog.V(2).Info("Syncer initialized successfully") + glog.V(2).Info("Syncer initialized (passive mode, Pipeline handles scheduling)") return nil } @@ -583,18 +608,11 @@ func (m *AppInfoModule) initDataWatcher() error { // Create DataWatcher instance m.dataWatcher = NewDataWatcher(m.cacheManager, m.hydrator, m.dataSender) - // Start DataWatcher - // if err := m.dataWatcher.Start(m.ctx); err != nil { - // return fmt.Errorf("failed to start DataWatcher: %w", err) - // } if err := m.dataWatcher.StartWithOptions(m.ctx, false); err != nil { return fmt.Errorf("failed to start DataWatcher: %w", err) } - // Wire DataWatcher into Hydrator's serial pipeline - m.hydrator.SetDataWatcher(m.dataWatcher) - - glog.V(2).Info("DataWatcher initialized successfully") + glog.V(2).Info("DataWatcher initialized (passive mode)") return nil } @@ -653,20 +671,11 @@ func (m *AppInfoModule) initDataWatcherRepo() error { // Create DataWatcherRepo instance m.dataWatcherRepo = NewDataWatcherRepo(m.redisClient, m.cacheManager, m.dataWatcher, m.dataSender) - // Start DataWatcherRepo - // if err := m.dataWatcherRepo.Start(); err != nil { - // return fmt.Errorf("failed to start DataWatcherRepo: %w", err) - // } - if err := m.dataWatcherRepo.StartWithOptions(false); err != nil { return fmt.Errorf("failed to start DataWatcherRepo: %w", err) } - if m.hydrator != nil { - m.hydrator.SetDataWatcherRepo(m.dataWatcherRepo) - } - - glog.V(2).Info("DataWatcherRepo initialized successfully") + glog.V(2).Info("DataWatcherRepo initialized (passive mode)") return nil } @@ -680,20 +689,11 @@ func (m *AppInfoModule) initStatusCorrectionChecker() error { m.statusCorrectionChecker = NewStatusCorrectionChecker(m.cacheManager) - // Start StatusCorrectionChecker - // if err := m.statusCorrectionChecker.Start(); err != nil { - // return fmt.Errorf("failed to start StatusCorrectionChecker: %w", err) - // } - if err := m.statusCorrectionChecker.StartWithOptions(false); err != nil { return fmt.Errorf("failed to start StatusCorrectionChecker: %w", err) } - if m.hydrator != nil { - m.hydrator.SetStatusCorrectionChecker(m.statusCorrectionChecker) - } - - glog.V(2).Info("StatusCorrectionChecker initialized successfully") + glog.V(2).Info("StatusCorrectionChecker initialized (passive mode)") return nil } diff --git a/internal/v2/appinfo/cache.go b/internal/v2/appinfo/cache.go index b5e38e8..0036302 100644 --- a/internal/v2/appinfo/cache.go +++ b/internal/v2/appinfo/cache.go @@ -897,18 +897,57 @@ func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType App appData.Timestamp = time.Now().Unix() sourceData.AppInfoLatest = append(sourceData.AppInfoLatest, appData) case AppInfoLatestPending: - // Clear existing AppInfoLatestPending list before adding new data - // This ensures we don't accumulate old data when hash doesn't match + // Build version map from AppInfoLatest to skip apps with unchanged versions + latestVersionMap := make(map[string]string) + for _, latestApp := range sourceData.AppInfoLatest { + if latestApp == nil || latestApp.RawData == nil { + continue + } + v := latestApp.RawData.Version + if v == "" { + continue + } + if latestApp.RawData.Name != "" { + latestVersionMap[latestApp.RawData.Name] = v + } + if latestApp.RawData.AppID != "" { + latestVersionMap[latestApp.RawData.AppID] = v + } + if latestApp.RawData.ID != "" { + latestVersionMap[latestApp.RawData.ID] = v + } + } + originalCount := len(sourceData.AppInfoLatestPending) - sourceData.AppInfoLatestPending = sourceData.AppInfoLatestPending[:0] // Clear the slice + sourceData.AppInfoLatestPending = sourceData.AppInfoLatestPending[:0] glog.V(3).Infof("Cleared %d existing AppInfoLatestPending entries for user=%s, source=%s", originalCount, userID, sourceID) - // Check if this is a complete market data structure + shouldSkipApp := func(appData *AppInfoLatestPendingData) bool { + if appData == nil || appData.RawData == nil { + return false + } + incomingVersion := appData.RawData.Version + if incomingVersion == "" { + return false + } + if name := appData.RawData.Name; name != "" { + if existing, ok := latestVersionMap[name]; ok && existing == incomingVersion { + return true + } + } + if id := appData.RawData.AppID; id != "" { + if existing, ok := latestVersionMap[id]; ok && existing == incomingVersion { + return true + } + } + return false + } + + skippedCount := 0 + if appsData, hasApps := data["apps"].(map[string]interface{}); hasApps { - // This is complete market data, extract individual apps glog.V(3).Infof("Processing complete market data with %d apps for user=%s, source=%s", len(appsData), userID, sourceID) - // Also store the "others" data (hash, version, topics, etc.) others := &types.Others{} if version, ok := data["version"].(string); ok { others.Version = version @@ -916,8 +955,6 @@ func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType App if hash, ok := data["hash"].(string); ok { others.Hash = hash } - - // Extract topics, recommends, pages if present if topics, ok := data["topics"].(map[string]interface{}); ok { for _, topicData := range topics { if topicMap, ok := topicData.(map[string]interface{}); ok { @@ -925,7 +962,6 @@ func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType App if name, ok := topicMap["name"].(string); ok { topic.Name = name } - // Extract topic data if present if data, ok := topicMap["data"].(map[string]interface{}); ok { topic.Data = make(map[string]*types.TopicData) for lang, topicDataInterface := range data { @@ -942,77 +978,66 @@ func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType App } } } - - // Store others data in source sourceData.Others = others - // Process each individual app for appID, appDataInterface := range appsData { if appDataMap, ok := appDataInterface.(map[string]interface{}); ok { - glog.V(3).Infof("DEBUG: CALL POINT 1 - Processing app %s for user=%s, source=%s", appID, userID, sourceID) - glog.V(3).Infof("DEBUG: CALL POINT 1 - App data before calling NewAppInfoLatestPendingDataFromLegacyData: %+v", appDataMap) appData := NewAppInfoLatestPendingDataFromLegacyData(appDataMap) - if appData != nil { - appData.Timestamp = time.Now().Unix() - sourceData.AppInfoLatestPending = append(sourceData.AppInfoLatestPending, appData) - glog.V(2).Infof("Added app %s for user=%s, source=%s", appID, userID, sourceID) - } else { - glog.Warningf("Failed to create app data for app %s (user=%s, source=%s)", appID, userID, sourceID) + if appData == nil { + continue } + if shouldSkipApp(appData) { + skippedCount++ + continue + } + appData.Timestamp = time.Now().Unix() + sourceData.AppInfoLatestPending = append(sourceData.AppInfoLatestPending, appData) + glog.V(3).Infof("Added app %s for user=%s, source=%s", appID, userID, sourceID) } } - - glog.V(3).Infof("Successfully processed %d apps from market data for user=%s, source=%s", len(sourceData.AppInfoLatestPending), userID, sourceID) - } else { // + 走这里 - // This might be market data with nested apps structure, try to extract apps - glog.V(3).Infof("DEBUG: CALL POINT 2 - Processing potential market data for user=%s, source=%s", userID, sourceID) - glog.V(3).Infof("DEBUG: CALL POINT 2 - Data before processing: %+v", data) - - // Check if this is market data with nested structure + } else { if dataSection, hasData := data["data"].(map[string]interface{}); hasData { if appsData, hasApps := dataSection["apps"].(map[string]interface{}); hasApps { - // This is market data with apps - process each app individually - glog.V(3).Infof("DEBUG: CALL POINT 2 - Found nested apps structure with %d apps", len(appsData)) for appID, appDataInterface := range appsData { if appDataMap, ok := appDataInterface.(map[string]interface{}); ok { - glog.V(3).Infof("DEBUG: CALL POINT 2 - Processing app %s", appID) appData := NewAppInfoLatestPendingDataFromLegacyData(appDataMap) - if appData != nil { - appData.Timestamp = time.Now().Unix() - sourceData.AppInfoLatestPending = append(sourceData.AppInfoLatestPending, appData) - glog.V(3).Infof("Added app %s for user=%s, source=%s", appID, userID, sourceID) - } else { - glog.Warningf("Failed to create app data for app %s (user=%s, source=%s)", appID, userID, sourceID) + if appData == nil { + continue + } + if shouldSkipApp(appData) { + skippedCount++ + continue } + appData.Timestamp = time.Now().Unix() + sourceData.AppInfoLatestPending = append(sourceData.AppInfoLatestPending, appData) + glog.V(3).Infof("Added app %s for user=%s, source=%s", appID, userID, sourceID) } } - glog.V(2).Infof("Successfully processed %d apps from nested market data for user=%s, source=%s", len(sourceData.AppInfoLatestPending), userID, sourceID) } else { glog.Warningf("Market data found but no apps section for user=%s, source=%s", userID, sourceID) } } else { - // This might be actual single app data, try to process directly - glog.V(3).Infof("DEBUG: CALL POINT 2 - Trying as single app data for user=%s, source=%s", userID, sourceID) appData := NewAppInfoLatestPendingDataFromLegacyData(data) if appData == nil { - glog.Warningf("Failed to create AppInfoLatestPendingData from data for user=%s, source=%s - not recognized as app data or market data", userID, sourceID) + glog.Warningf("Failed to create AppInfoLatestPendingData for user=%s, source=%s", userID, sourceID) return fmt.Errorf("invalid app data: missing required identifiers (id, name, or appID)") } - - appData.Timestamp = time.Now().Unix() - sourceData.AppInfoLatestPending = append(sourceData.AppInfoLatestPending, appData) - glog.V(2).Infof("Successfully processed single app data for user=%s, source=%s", userID, sourceID) + if !shouldSkipApp(appData) { + appData.Timestamp = time.Now().Unix() + sourceData.AppInfoLatestPending = append(sourceData.AppInfoLatestPending, appData) + } else { + skippedCount++ + } } } - glog.V(2).Infof("Updated AppInfoLatestPending list with %d new entries for user=%s, source=%s", - len(sourceData.AppInfoLatestPending), userID, sourceID) + glog.V(2).Infof("Updated AppInfoLatestPending: %d new, %d skipped (unchanged version) for user=%s, source=%s", + len(sourceData.AppInfoLatestPending), skippedCount, userID, sourceID) - // Notify hydrator about pending data update for immediate task creation if cm.hydrationNotifier != nil && len(sourceData.AppInfoLatestPending) > 0 { - glog.V(3).Infof("Notifying hydrator about pending data update for user=%s, source=%s", userID, sourceID) - glog.V(2).Info("Serial pipeline, syncer done, continue...") - go cm.hydrationNotifier.NotifyPendingDataUpdate(userID, sourceID, data) // +++++ + glog.V(2).Infof("Notifying pipeline about %d pending apps for user=%s, source=%s", + len(sourceData.AppInfoLatestPending), userID, sourceID) + go cm.hydrationNotifier.NotifyPendingDataUpdate(userID, sourceID, data) } case types.AppRenderFailed: // Handle render failed data - this is typically set by the hydrator when tasks fail diff --git a/internal/v2/appinfo/datawatcher_app.go b/internal/v2/appinfo/datawatcher_app.go index ce828df..3d12bbf 100644 --- a/internal/v2/appinfo/datawatcher_app.go +++ b/internal/v2/appinfo/datawatcher_app.go @@ -51,26 +51,6 @@ func NewDataWatcher(cacheManager *CacheManager, hydrator *Hydrator, dataSender * // Start begins the data watching process func (dw *DataWatcher) Start(ctx context.Context) error { - // if atomic.LoadInt32(&dw.isRunning) == 1 { - // return fmt.Errorf("DataWatcher is already running") - // } - - // if dw.cacheManager == nil { - // return fmt.Errorf("CacheManager is required for DataWatcher") - // } - - // if dw.hydrator == nil { - // return fmt.Errorf("Hydrator is required for DataWatcher") - // } - - // atomic.StoreInt32(&dw.isRunning, 1) - // glog.Infof("Starting DataWatcher with interval: %v", time.Duration(atomic.LoadInt64((*int64)(&dw.interval)))) - - // // Start the monitoring goroutine - // go dw.watchLoop(ctx) - - // return nil - return dw.StartWithOptions(ctx, true) } @@ -1293,7 +1273,9 @@ func (dw *DataWatcher) ProcessSingleAppToLatest(userID, sourceID string, pending return false } - glog.V(2).Infof("Serial pipeline, datawatcher_app user: %s, source: %s, id: %s, name: %s", userID, sourceID, pendingApp.AppInfo.AppEntry.ID, pendingApp.AppInfo.AppEntry.Name) + appID := dw.getAppID(pendingApp) + appName := dw.getAppName(pendingApp) + glog.V(2).Infof("Pipeline: ProcessSingleAppToLatest user=%s, source=%s, id=%s, name=%s", userID, sourceID, appID, appName) // Acquire write lock to move data if !dw.cacheManager.mutex.TryLock() { @@ -1311,9 +1293,6 @@ func (dw *DataWatcher) ProcessSingleAppToLatest(userID, sourceID string, pending return false } - appName := dw.getAppName(pendingApp) - appID := dw.getAppID(pendingApp) - // Check if app with same name already exists in AppInfoLatest existingIndex := -1 for i, existingApp := range sourceData.AppInfoLatest { diff --git a/internal/v2/appinfo/datawatcher_state.go b/internal/v2/appinfo/datawatcher_state.go index bf17127..0de0be8 100644 --- a/internal/v2/appinfo/datawatcher_state.go +++ b/internal/v2/appinfo/datawatcher_state.go @@ -156,42 +156,27 @@ func (dw *DataWatcherState) resolveInvisibleFlag(raw *bool, entranceName, appNam // fetchInvisibleFromAppService fetches invisible flag from app-service API's spec.entrances // Uses caching to avoid repeated API calls for the same app func (dw *DataWatcherState) fetchInvisibleFromAppService(appName, userID, entranceName string) (bool, error) { - // Check cache first (using TryRLock to avoid blocking) cacheKey := fmt.Sprintf("%s:%s", userID, appName) - dw.appServiceCacheMutex.RLock() - defer dw.appServiceCacheMutex.RUnlock() + // Check cache first (short read lock) + dw.appServiceCacheMutex.RLock() if appCache, exists := dw.appServiceCache[cacheKey]; exists { if invisible, found := appCache[entranceName]; found { - // dw.appServiceCacheMutex.RUnlock() - glog.V(3).Infof("DEBUG: fetchInvisibleFromAppService - using cached invisible=%t for entrance %s (app=%s, user=%s)", + dw.appServiceCacheMutex.RUnlock() + glog.V(3).Infof("fetchInvisibleFromAppService - cached invisible=%t for entrance %s (app=%s, user=%s)", invisible, entranceName, appName, userID) return invisible, nil } } + dw.appServiceCacheMutex.RUnlock() - // if dw.appServiceCacheMutex.TryRLock() { - // if appCache, exists := dw.appServiceCache[cacheKey]; exists { - // if invisible, found := appCache[entranceName]; found { - // dw.appServiceCacheMutex.RUnlock() - // glog.V(3).Infof("DEBUG: fetchInvisibleFromAppService - using cached invisible=%t for entrance %s (app=%s, user=%s)", - // invisible, entranceName, appName, userID) - // return invisible, nil - // } - // } - // dw.appServiceCacheMutex.RUnlock() - // } else { - // glog.Warningf("[TryRLock] DEBUG: fetchInvisibleFromAppService - read lock not available, skipping cache check for entrance %s (app=%s, user=%s)", - // entranceName, appName, userID) - // } - - // Fetch from API + // Fetch from API (no lock held) host := getEnvOrDefault("APP_SERVICE_SERVICE_HOST", "localhost") port := getEnvOrDefault("APP_SERVICE_SERVICE_PORT", "80") url := fmt.Sprintf("http://%s:%s/app-service/v1/all/apps", host, port) client := &http.Client{ - Timeout: 5 * time.Second, // Short timeout to avoid blocking + Timeout: 5 * time.Second, } resp, err := client.Get(url) @@ -214,10 +199,8 @@ func (dw *DataWatcherState) fetchInvisibleFromAppService(appName, userID, entran return false, fmt.Errorf("failed to parse app-service response: %v", err) } - // Find the app matching appName and userID for _, app := range apps { if app.Spec.Name == appName && app.Spec.Owner == userID { - // Find the entrance in spec.entrances first var foundInvisible bool var invisibleValue bool for _, specEntrance := range app.Spec.Entrances { @@ -232,35 +215,18 @@ func (dw *DataWatcherState) fetchInvisibleFromAppService(appName, userID, entran return false, fmt.Errorf("entrance %s not found in spec.entrances for app %s", entranceName, appName) } - // Cache all entrances for this app to avoid future API calls (using TryLock to avoid blocking) + // Write cache (separate write lock, no read lock held) dw.appServiceCacheMutex.Lock() - defer dw.appServiceCacheMutex.Unlock() - if dw.appServiceCache[cacheKey] == nil { dw.appServiceCache[cacheKey] = make(map[string]bool) } for _, specEntrance := range app.Spec.Entrances { dw.appServiceCache[cacheKey][specEntrance.Name] = specEntrance.Invisible } - // dw.appServiceCacheMutex.Unlock() - glog.V(3).Infof("DEBUG: fetchInvisibleFromAppService - fetched and cached invisible=%t for entrance %s (app=%s, user=%s)", - invisibleValue, entranceName, appName, userID) - - // if dw.appServiceCacheMutex.TryLock() { - // if dw.appServiceCache[cacheKey] == nil { - // dw.appServiceCache[cacheKey] = make(map[string]bool) - // } - // for _, specEntrance := range app.Spec.Entrances { - // dw.appServiceCache[cacheKey][specEntrance.Name] = specEntrance.Invisible - // } - // dw.appServiceCacheMutex.Unlock() - // glog.V(3).Infof("DEBUG: fetchInvisibleFromAppService - fetched and cached invisible=%t for entrance %s (app=%s, user=%s)", - // invisibleValue, entranceName, appName, userID) - // } else { - // glog.Warningf("[TryLock] DEBUG: fetchInvisibleFromAppService - write lock not available, skipping cache update for entrance %s (app=%s, user=%s)", - // entranceName, appName, userID) - // } + dw.appServiceCacheMutex.Unlock() + glog.V(3).Infof("fetchInvisibleFromAppService - fetched and cached invisible=%t for entrance %s (app=%s, user=%s)", + invisibleValue, entranceName, appName, userID) return invisibleValue, nil } } diff --git a/internal/v2/appinfo/hydration.go b/internal/v2/appinfo/hydration.go index 1306121..7ebc528 100644 --- a/internal/v2/appinfo/hydration.go +++ b/internal/v2/appinfo/hydration.go @@ -35,13 +35,6 @@ type Hydrator struct { failedTasks map[string]*hydrationfn.HydrationTask taskMutex sync.RWMutex - // Serial pipeline (replaces parallel workers + pendingDataMonitor + DataWatcher.watchLoop) - dataWatcher *DataWatcher - dataWatcherRepo *DataWatcherRepo - statusCorrectionChecker *StatusCorrectionChecker - pipelineTrigger chan struct{} - pipelineMutex sync.Mutex - // Cache access mutex for unified lock strategy - removed, use CacheManager.mutex instead // Batch completion tracking @@ -119,7 +112,6 @@ func NewHydrator(cache *types.CacheData, settingsManager *settings.SettingsManag settingsManager: settingsManager, cacheManager: cacheManager, taskQueue: make(chan *hydrationfn.HydrationTask, config.QueueSize), - pipelineTrigger: make(chan struct{}, 1), workerCount: config.WorkerCount, stopChan: make(chan struct{}), isRunning: atomic.Bool{}, // Initialize atomic.Bool @@ -170,30 +162,17 @@ func (h *Hydrator) AddStep(step hydrationfn.HydrationStep) { h.steps = append(h.steps, step) } -// Start begins the hydration process with workers +// Start begins the hydration process in passive mode (Pipeline handles scheduling) func (h *Hydrator) Start(ctx context.Context) error { if h.isRunning.Load() { return fmt.Errorf("hydrator is already running") } h.isRunning.Store(true) - glog.V(3).Infof("Starting hydrator with %d workers and %d steps", h.workerCount, len(h.steps)) - - // Start worker goroutines - // for i := 0; i < h.workerCount; i++ { - // go h.worker(ctx, i) - // } - - // // Start pending data monitor - // go h.pendingDataMonitor(ctx) - - // Start serial pipeline loop (replaces worker pool + pendingDataMonitor + DataWatcher watchLoop) - go h.serialPipelineLoop(ctx) + glog.V(3).Infof("Starting hydrator with %d steps (passive mode, Pipeline handles scheduling)", len(h.steps)) - // Start batch completion processor go h.batchCompletionProcessor(ctx) - // Start database sync monitor if cache manager is available if h.cacheManager != nil { go h.databaseSyncMonitor(ctx) } @@ -1380,18 +1359,11 @@ func CreateDefaultHydrator(cache *types.CacheData, settingsManager *settings.Set return NewHydrator(cache, settingsManager, cacheManager, config) } -// NotifyPendingDataUpdate implements HydrationNotifier interface -// Processes pending data update notification and creates hydration tasks immediately +// NotifyPendingDataUpdate implements HydrationNotifier interface. +// In the new architecture, Pipeline handles notifications directly. +// This method is kept for backward compatibility but does nothing. func (h *Hydrator) NotifyPendingDataUpdate(userID, sourceID string, pendingData map[string]interface{}) { - if !h.IsRunning() { - glog.V(2).Infof("Hydrator is not running, ignoring pending data notification for user: %s, source: %s", userID, sourceID) - return - } - - glog.V(2).Infof("Received pending data update notification for user: %s, source: %s", userID, sourceID) - - // Create tasks from the pending data immediately - h.createTasksFromPendingDataMap(userID, sourceID, pendingData) + glog.V(3).Infof("Hydrator.NotifyPendingDataUpdate: Pipeline handles notifications, user=%s, source=%s", userID, sourceID) } // createTasksFromPendingDataMap creates hydration tasks from pending data map @@ -2125,248 +2097,3 @@ func (h *Hydrator) ForceCheckPendingData() { h.checkForPendingData() } -// SetDataWatcher sets the DataWatcher reference for the serial pipeline -func (h *Hydrator) SetDataWatcher(dw *DataWatcher) { - h.dataWatcher = dw -} - -func (h *Hydrator) SetDataWatcherRepo(dwr *DataWatcherRepo) { - h.dataWatcherRepo = dwr -} - -func (h *Hydrator) SetStatusCorrectionChecker(scc *StatusCorrectionChecker) { - h.statusCorrectionChecker = scc -} - -// serialPipelineLoop runs the serial pipeline, triggered by notifications or periodic timer -func (h *Hydrator) serialPipelineLoop(ctx context.Context) { - glog.V(3).Info("Serial pipeline loop started") - defer glog.V(3).Info("Serial pipeline loop stopped") - - ticker := time.NewTicker(30 * time.Second) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - return - case <-h.stopChan: - return - case <-h.pipelineTrigger: - h.runSerialPipeline(ctx) - case <-ticker.C: - h.runSerialPipeline(ctx) - } - } -} - -// runSerialPipeline collects all pending apps and processes them one by one -func (h *Hydrator) runSerialPipeline(ctx context.Context) { - if !h.pipelineMutex.TryLock() { - glog.V(3).Info("Serial pipeline: another run in progress, skipping") - return - } - defer h.pipelineMutex.Unlock() - - if h.cacheManager == nil { - return - } - - type pendingItem struct { - userID string - sourceID string - pending *types.AppInfoLatestPendingData - } - - // Step 1: Read-lock to snapshot all pending data - if !h.cacheManager.mutex.TryRLock() { - glog.Warning("[TryRLock] serialPipeline: CacheManager read lock not available, skipping") - return - } - var items []pendingItem - for userID, userData := range h.cache.Users { - for sourceID, sourceData := range userData.Sources { - for _, pd := range sourceData.AppInfoLatestPending { - if pd != nil { - items = append(items, pendingItem{userID, sourceID, pd}) - } - } - } - } - h.cacheManager.mutex.RUnlock() - - if len(items) == 0 { - return - } - - // glog.V(2).Infof("Serial pipeline: hydrator, found %d pending apps to process", len(items)) - if len(items) > 0 { - glog.V(2).Infof("Serial pipeline Phase 1: processing %d pending apps", len(items)) - } - - // Step 2: Process each app serially through the full pipeline - affectedUsers := make(map[string]bool) - var total = len(items) - for idx, item := range items { - select { - case <-ctx.Done(): - return - case <-h.stopChan: - return - default: - } - - glog.V(2).Infof("Serial pipeline: user: %s, source: %s, id: %s, name: %s, %d/%d", item.userID, item.sourceID, item.pending.AppInfo.AppEntry.ID, item.pending.AppInfo.AppEntry.Name, idx+1, total) - h.processSingleAppFullPipeline(ctx, item.userID, item.sourceID, item.pending) // + - affectedUsers[item.userID] = true - } - - glog.V(2).Info("Serial pipeline done, continue...") - - // Step 3: Calculate hash and sync for affected users - if h.dataWatcher != nil { - for userID := range affectedUsers { - userData := h.cacheManager.GetUserData(userID) - if userData != nil { - h.dataWatcher.CalculateAndSetUserHashDirect(userID, userData) - } - } - } - - // ========== Phase 2: DataWatcherRepo ========== - if h.dataWatcherRepo != nil { - select { - case <-ctx.Done(): - return - case <-h.stopChan: - return - default: - } - glog.V(3).Info("Serial pipeline Phase 2: processing DataWatcherRepo") - h.dataWatcherRepo.ProcessOnce() - } - - // ========== Phase 3: StatusCorrectionChecker ========== - if h.statusCorrectionChecker != nil { - select { - case <-ctx.Done(): - return - case <-h.stopChan: - return - default: - } - glog.V(3).Info("Serial pipeline Phase 3: processing StatusCorrectionChecker") - h.statusCorrectionChecker.PerformStatusCheckOnce() - } - - // ========== Phase 4: Hash + Sync ========== - if h.dataWatcher != nil { - for userID := range affectedUsers { - userData := h.cacheManager.GetUserData(userID) - if userData != nil { - h.dataWatcher.CalculateAndSetUserHashDirect(userID, userData) - } - } - } - - // Step 4: Force sync - if err := h.cacheManager.ForceSync(); err != nil { - glog.Errorf("Serial pipeline: ForceSync failed: %v", err) - } -} - -// processSingleAppFullPipeline processes a single app through hydration + move to latest -func (h *Hydrator) processSingleAppFullPipeline(ctx context.Context, userID, sourceID string, pendingData *types.AppInfoLatestPendingData) { - if pendingData == nil || pendingData.RawData == nil { - return - } - - appID := pendingData.RawData.AppID - if appID == "" { - appID = pendingData.RawData.ID - } - appName := pendingData.RawData.Name - if appID == "" { - return - } - - // Skip if in render failed list - if h.isAppInRenderFailedList(userID, sourceID, appID, appName) { - return - } - - // Check if already hydrated → move to latest directly - if h.isAppHydrationComplete(pendingData) { // + - if h.dataWatcher != nil { - h.dataWatcher.ProcessSingleAppToLatest(userID, sourceID, pendingData) // + processSingleAppFullPipeline - } - return - } - - // Skip if already in latest queue with matching version - version := "" - if pendingData.RawData != nil { - version = pendingData.RawData.Version - } - if h.isAppInLatestQueue(userID, sourceID, appID, appName, version) { - return - } - - // Create a task for hydration step execution - appDataMap := h.convertApplicationInfoEntryToMap(pendingData.RawData) - if len(appDataMap) == 0 { - return - } - - var cacheManagerIface types.CacheManagerInterface - if h.cacheManager != nil { - cacheManagerIface = h.cacheManager - } - task := hydrationfn.NewHydrationTaskWithManager( - userID, sourceID, appID, - appDataMap, h.cache, cacheManagerIface, h.settingsManager, - ) - - glog.V(2).Infof("Serial pipeline: processing app %s %s (user=%s, source=%s)", appID, appName, userID, sourceID) - taskStartTime := time.Now() - - // Execute hydration steps synchronously - for i, step := range h.steps { - _ = i - if step.CanSkip(ctx, task) { - task.IncrementStep() - continue - } - - if err := step.Execute(ctx, task); err != nil { - failureReason := err.Error() - failureStep := step.GetStepName() - glog.Errorf("Serial pipeline: step %s failed for app %s %s: %v", failureStep, appID, appName, err) - - h.moveTaskToRenderFailed(task, failureReason, failureStep) - duration := time.Since(taskStartTime) - h.markTaskFailed(task, taskStartTime, duration, failureStep, failureReason) - return - } - - task.IncrementStep() - } - - if !h.isAppHydrationComplete(pendingData) { - glog.Warningf("Serial pipeline: hydration steps completed but data incomplete for app %s %s (user=%s, source=%s), will retry next cycle", - appID, appName, userID, sourceID) - return - } - - // All steps completed - task.SetStatus(hydrationfn.TaskStatusCompleted) - duration := time.Since(taskStartTime) - h.markTaskCompleted(task, taskStartTime, duration) - - glog.V(2).Infof("Serial pipeline: hydration completed for app %s %s, moving to latest", appID, appName) - - // Move to Latest - if h.dataWatcher != nil { - h.dataWatcher.ProcessSingleAppToLatest(userID, sourceID, pendingData) // + processSingleAppFullPipeline - } -} diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go new file mode 100644 index 0000000..5abbbf6 --- /dev/null +++ b/internal/v2/appinfo/pipeline.go @@ -0,0 +1,345 @@ +package appinfo + +import ( + "context" + "sync" + "sync/atomic" + "time" + + "market/internal/v2/appinfo/hydrationfn" + "market/internal/v2/types" + + "github.com/golang/glog" +) + +// Pipeline orchestrates the serial execution of all data processing phases: +// +// Phase 1: Syncer - fetch remote app data +// Phase 2: Hydrator - process pending apps (hydration + move to Latest) +// Phase 3: DataWatcherRepo - process chart-repo state changes +// Phase 4: StatusCorrectionChecker - correct app running statuses +// Phase 5: Hash calculation + ForceSync +type Pipeline struct { + cacheManager *CacheManager + cache *types.CacheData + syncer *Syncer + hydrator *Hydrator + dataWatcher *DataWatcher + dataWatcherRepo *DataWatcherRepo + statusCorrectionChecker *StatusCorrectionChecker + + trigger chan struct{} + mutex sync.Mutex + stopChan chan struct{} + isRunning atomic.Bool + interval time.Duration +} + +func NewPipeline(cacheManager *CacheManager, cache *types.CacheData, interval time.Duration) *Pipeline { + if interval <= 0 { + interval = 30 * time.Second + } + return &Pipeline{ + cacheManager: cacheManager, + cache: cache, + trigger: make(chan struct{}, 1), + stopChan: make(chan struct{}), + interval: interval, + } +} + +func (p *Pipeline) SetSyncer(s *Syncer) { p.syncer = s } +func (p *Pipeline) SetHydrator(h *Hydrator) { p.hydrator = h } +func (p *Pipeline) SetDataWatcher(dw *DataWatcher) { p.dataWatcher = dw } +func (p *Pipeline) SetDataWatcherRepo(dwr *DataWatcherRepo) { p.dataWatcherRepo = dwr } +func (p *Pipeline) SetStatusCorrectionChecker(scc *StatusCorrectionChecker) { p.statusCorrectionChecker = scc } + +// NotifyPendingDataUpdate implements HydrationNotifier interface. +// Called by CacheManager after new pending data is written. +func (p *Pipeline) NotifyPendingDataUpdate(userID, sourceID string, pendingData map[string]interface{}) { + if !p.isRunning.Load() { + return + } + glog.V(2).Infof("Pipeline: pending data notification received for user=%s, source=%s", userID, sourceID) + select { + case p.trigger <- struct{}{}: + default: + } +} + +func (p *Pipeline) Start(ctx context.Context) error { + if p.isRunning.Load() { + return nil + } + p.isRunning.Store(true) + go p.loop(ctx) + glog.Infof("Pipeline started with interval %v", p.interval) + return nil +} + +func (p *Pipeline) Stop() { + if !p.isRunning.Load() { + return + } + close(p.stopChan) + p.isRunning.Store(false) + glog.Info("Pipeline stopped") +} + +func (p *Pipeline) Trigger() { + select { + case p.trigger <- struct{}{}: + default: + } +} + +func (p *Pipeline) loop(ctx context.Context) { + glog.Info("Pipeline loop started") + defer glog.Info("Pipeline loop stopped") + + ticker := time.NewTicker(p.interval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-p.stopChan: + return + case <-p.trigger: + p.run(ctx) + case <-ticker.C: + p.run(ctx) + } + } +} + +func (p *Pipeline) run(ctx context.Context) { + if !p.mutex.TryLock() { + glog.V(3).Info("Pipeline: another run in progress, skipping") + return + } + defer p.mutex.Unlock() + + startTime := time.Now() + + p.phaseSyncer(ctx) + affectedUsers := p.phaseHydrateApps(ctx) + p.phaseDataWatcherRepo(ctx) + p.phaseStatusCorrection(ctx) + p.phaseHashAndSync(affectedUsers) + + if elapsed := time.Since(startTime); elapsed > 5*time.Second { + glog.V(2).Infof("Pipeline: cycle completed in %v", elapsed) + } +} + +// phaseSyncer fetches remote data +func (p *Pipeline) phaseSyncer(ctx context.Context) { + if p.syncer == nil { + return + } + select { + case <-ctx.Done(): + return + case <-p.stopChan: + return + default: + } + glog.V(3).Info("Pipeline Phase 1: Syncer") + p.syncer.SyncOnce(ctx) +} + +// phaseHydrateApps processes pending apps one by one through hydration + move to Latest +func (p *Pipeline) phaseHydrateApps(ctx context.Context) map[string]bool { + affectedUsers := make(map[string]bool) + if p.hydrator == nil || p.cacheManager == nil { + return affectedUsers + } + + type pendingItem struct { + userID string + sourceID string + pending *types.AppInfoLatestPendingData + } + + p.cacheManager.mutex.RLock() + var items []pendingItem + for userID, userData := range p.cache.Users { + for sourceID, sourceData := range userData.Sources { + for _, pd := range sourceData.AppInfoLatestPending { + if pd != nil { + items = append(items, pendingItem{userID, sourceID, pd}) + } + } + } + } + p.cacheManager.mutex.RUnlock() + + if len(items) == 0 { + return affectedUsers + } + + total := len(items) + glog.V(2).Infof("Pipeline Phase 2: processing %d pending apps", total) + + for idx, item := range items { + select { + case <-ctx.Done(): + return affectedUsers + case <-p.stopChan: + return affectedUsers + default: + } + + appID, appName := getAppIdentifiers(item.pending) + glog.V(2).Infof("Pipeline Phase 2: [%d/%d] %s %s (user=%s, source=%s)", + idx+1, total, appID, appName, item.userID, item.sourceID) + + hydrated := p.hydrator.HydrateSingleApp(ctx, item.userID, item.sourceID, item.pending) + if hydrated && p.dataWatcher != nil { + p.dataWatcher.ProcessSingleAppToLatest(item.userID, item.sourceID, item.pending) + } + affectedUsers[item.userID] = true + } + + return affectedUsers +} + +// phaseDataWatcherRepo processes chart-repo state changes +func (p *Pipeline) phaseDataWatcherRepo(ctx context.Context) { + if p.dataWatcherRepo == nil { + return + } + select { + case <-ctx.Done(): + return + case <-p.stopChan: + return + default: + } + glog.V(3).Info("Pipeline Phase 3: DataWatcherRepo") + p.dataWatcherRepo.ProcessOnce() +} + +// phaseStatusCorrection corrects app running statuses +func (p *Pipeline) phaseStatusCorrection(ctx context.Context) { + if p.statusCorrectionChecker == nil { + return + } + select { + case <-ctx.Done(): + return + case <-p.stopChan: + return + default: + } + glog.V(3).Info("Pipeline Phase 4: StatusCorrectionChecker") + p.statusCorrectionChecker.PerformStatusCheckOnce() +} + +// phaseHashAndSync calculates user hashes and syncs to Redis +func (p *Pipeline) phaseHashAndSync(affectedUsers map[string]bool) { + if p.dataWatcher != nil && len(affectedUsers) > 0 { + for userID := range affectedUsers { + userData := p.cacheManager.GetUserData(userID) + if userData != nil { + p.dataWatcher.CalculateAndSetUserHashDirect(userID, userData) + } + } + } + if p.cacheManager != nil { + if err := p.cacheManager.ForceSync(); err != nil { + glog.Errorf("Pipeline: ForceSync failed: %v", err) + } + } +} + +func getAppIdentifiers(pd *types.AppInfoLatestPendingData) (string, string) { + if pd == nil || pd.RawData == nil { + return "unknown", "unknown" + } + appID := pd.RawData.AppID + if appID == "" { + appID = pd.RawData.ID + } + return appID, pd.RawData.Name +} + +// HydrateSingleApp runs hydration steps for a single app synchronously. +// Returns true if hydration completed and data is ready for move to Latest. +func (h *Hydrator) HydrateSingleApp(ctx context.Context, userID, sourceID string, pendingData *types.AppInfoLatestPendingData) bool { + if pendingData == nil || pendingData.RawData == nil { + return false + } + + appID := pendingData.RawData.AppID + if appID == "" { + appID = pendingData.RawData.ID + } + appName := pendingData.RawData.Name + if appID == "" { + return false + } + + if h.isAppInRenderFailedList(userID, sourceID, appID, appName) { + return false + } + + if h.isAppHydrationComplete(pendingData) { + return true + } + + version := "" + if pendingData.RawData != nil { + version = pendingData.RawData.Version + } + if h.isAppInLatestQueue(userID, sourceID, appID, appName, version) { + return false + } + + appDataMap := h.convertApplicationInfoEntryToMap(pendingData.RawData) + if len(appDataMap) == 0 { + return false + } + + var cacheManagerIface types.CacheManagerInterface + if h.cacheManager != nil { + cacheManagerIface = h.cacheManager + } + task := hydrationfn.NewHydrationTaskWithManager( + userID, sourceID, appID, + appDataMap, h.cache, cacheManagerIface, h.settingsManager, + ) + + glog.V(3).Infof("HydrateSingleApp: processing %s %s (user=%s, source=%s)", appID, appName, userID, sourceID) + taskStartTime := time.Now() + + for _, step := range h.steps { + if step.CanSkip(ctx, task) { + task.IncrementStep() + continue + } + if err := step.Execute(ctx, task); err != nil { + failureReason := err.Error() + failureStep := step.GetStepName() + glog.Errorf("HydrateSingleApp: step %s failed for app %s %s: %v", failureStep, appID, appName, err) + h.moveTaskToRenderFailed(task, failureReason, failureStep) + duration := time.Since(taskStartTime) + h.markTaskFailed(task, taskStartTime, duration, failureStep, failureReason) + return false + } + task.IncrementStep() + } + + if !h.isAppHydrationComplete(pendingData) { + glog.Warningf("HydrateSingleApp: steps completed but data incomplete for app %s %s, will retry next cycle", appID, appName) + return false + } + + task.SetStatus(hydrationfn.TaskStatusCompleted) + duration := time.Since(taskStartTime) + h.markTaskCompleted(task, taskStartTime, duration) + glog.V(2).Infof("HydrateSingleApp: completed for app %s %s in %v", appID, appName, duration) + return true +} diff --git a/internal/v2/appinfo/syncer.go b/internal/v2/appinfo/syncer.go index 8352b56..4b24853 100644 --- a/internal/v2/appinfo/syncer.go +++ b/internal/v2/appinfo/syncer.go @@ -103,8 +103,14 @@ func (s *Syncer) GetSteps() []syncerfn.SyncStep { return steps } -// Start begins the synchronization process +// Start begins the synchronization process with its own sync loop func (s *Syncer) Start(ctx context.Context) error { + return s.StartWithOptions(ctx, true) +} + +// StartWithOptions starts the syncer with options. +// If enableSyncLoop is false, the periodic sync loop is not started (Pipeline handles scheduling). +func (s *Syncer) StartWithOptions(ctx context.Context, enableSyncLoop bool) error { if !s.mutex.TryLock() { return fmt.Errorf("failed to acquire lock for Start") } @@ -115,12 +121,25 @@ func (s *Syncer) Start(ctx context.Context) error { s.isRunning.Store(true) s.mutex.Unlock() - glog.V(2).Infof("Starting syncer with %d steps, sync interval: %v", len(s.steps), s.syncInterval) - - go s.syncLoop(ctx) + if enableSyncLoop { + glog.V(2).Infof("Starting syncer with %d steps, sync interval: %v", len(s.steps), s.syncInterval) + go s.syncLoop(ctx) + } else { + glog.V(2).Infof("Starting syncer with %d steps (passive mode, Pipeline handles scheduling)", len(s.steps)) + } return nil } +// SyncOnce executes one sync cycle, called by Pipeline +func (s *Syncer) SyncOnce(ctx context.Context) { + if !s.isRunning.Load() { + return + } + if err := s.executeSyncCycle(ctx); err != nil { + glog.Errorf("SyncOnce: sync cycle failed: %v", err) + } +} + // Stop stops the synchronization process func (s *Syncer) Stop() { if !s.mutex.TryLock() { From 3dd0fe99995db00811a32c648f2365b8c0d78113 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 28 Feb 2026 13:11:35 +0000 Subject: [PATCH 03/45] refactor: replace TryLock with blocking Lock on pipeline execution path Pipeline-internal operations must complete, not skip. Change ~15 TryLock/TryRLock calls to blocking Lock/RLock in code that is only called from the Pipeline goroutine: - hydrationfn/task_for_api.go: writeAppDataToCache TryLock -> Lock - datawatcher_app.go: ProcessSingleAppToLatest TryLock -> Lock - datawatcher_app.go: calculateAndSetUserHashDirect - simplify goroutine/channel/timeout pattern to direct Lock (60+ lines removed) - status_correction_check.go: hash update TryLock -> Lock - hydration.go: isAppInRenderFailedList, isAppInLatestQueue TryRLock -> RLock - hydration.go: moveTaskToRenderFailed TryRLock -> RLock - hydration.go: removeFromPendingList - simplify two-phase TryRLock+TryLock to single Lock - syncer.go: storeDataForSource, storeDataDirectly, storeDataViaCacheManager TryLock/TryRLock -> Lock/RLock cache.go internal methods keep TryLock (shared with DataWatcherState NATS + API handlers). Co-authored-by: aby913 --- internal/v2/appinfo/datawatcher_app.go | 94 +------------------ internal/v2/appinfo/hydration.go | 38 ++------ .../v2/appinfo/hydrationfn/task_for_api.go | 5 +- .../v2/appinfo/status_correction_check.go | 7 +- internal/v2/appinfo/syncer.go | 26 +---- 5 files changed, 17 insertions(+), 153 deletions(-) diff --git a/internal/v2/appinfo/datawatcher_app.go b/internal/v2/appinfo/datawatcher_app.go index 3d12bbf..13d38bb 100644 --- a/internal/v2/appinfo/datawatcher_app.go +++ b/internal/v2/appinfo/datawatcher_app.go @@ -397,94 +397,14 @@ func (dw *DataWatcher) calculateAndSetUserHashDirect(userID string, userData *ty glog.V(2).Infof("DataWatcher: Hash changed for user %s: %s -> %s", userID, currentHash, newHash) - // Use a single write lock acquisition with timeout to avoid deadlock - writeTimeout := 5 * time.Second - writeLockAcquired := make(chan bool, 1) - writeLockError := make(chan error, 1) - cancel := make(chan bool, 1) - - go func() { - defer func() { - if r := recover(); r != nil { - glog.Errorf("DataWatcher: Panic during write lock acquisition for user %s: %v", userID, r) - writeLockError <- fmt.Errorf("panic during write lock acquisition: %v", r) - } - }() - - glog.V(3).Infof("DataWatcher: Attempting to acquire write lock for user %s", userID) - glog.V(3).Infof("[LOCK] dw.cacheManager.mutex.TryLock() @439 Start") - if !dw.cacheManager.mutex.TryLock() { - glog.Warningf("DataWatcher: Write lock not available for user %s, skipping hash update", userID) - writeLockError <- fmt.Errorf("write lock not available") - return - } - defer func() { - dw.cacheManager.mutex.Unlock() - glog.V(3).Infof("[LOCK] dw.cacheManager.mutex.Unlock() @453 Start") - glog.V(3).Infof("DataWatcher: Write lock released for user %s", userID) - }() - - // Check if cancelled before sending signal - select { - case <-cancel: - glog.V(3).Infof("DataWatcher: Write lock acquisition cancelled for user %s", userID) - return - default: - } - - glog.V(3).Infof("DataWatcher: Write lock acquired for user %s", userID) - glog.V(3).Infof("[LOCK] dw.cacheManager.mutex.Lock() @439 Success") - - // Send signal and wait for processing - select { - case writeLockAcquired <- true: - // Successfully sent signal, wait for cancellation or completion - <-cancel - case <-cancel: - glog.V(3).Infof("DataWatcher: Write lock acquisition cancelled before signal for user %s", userID) - } - }() - - select { - case <-writeLockAcquired: - // Write lock acquired successfully - glog.V(3).Infof("DataWatcher: Write lock acquired for hash update, user %s", userID) - - // Update hash and release lock immediately - originalUserData.Hash = newHash - glog.V(3).Infof("DataWatcher: Hash updated in memory for user %s", userID) - - // Cancel the goroutine to release the lock - close(cancel) - - case err := <-writeLockError: - glog.Errorf("DataWatcher: Error acquiring write lock for user %s: %v", userID, err) - close(cancel) - return false - - case <-time.After(writeTimeout): - glog.Errorf("DataWatcher: Timeout acquiring write lock for hash update, user %s", userID) - close(cancel) - return false - } + dw.cacheManager.mutex.Lock() + originalUserData.Hash = newHash + dw.cacheManager.mutex.Unlock() glog.V(3).Infof("DataWatcher: Hash updated for user %s", userID) - // Verification: Check if the hash was actually updated - if glog.V(2) { - verifyUserData := dw.cacheManager.GetUserData(userID) - if verifyUserData != nil { - verifyHash := verifyUserData.Hash - glog.V(2).Infof("DataWatcher: Verification - hash = '%s' for user %s", verifyHash, userID) - } else { - glog.Errorf("DataWatcher: Verification failed - CacheManager.GetUserData returned nil for user %s", userID) - } - } - - // Trigger force sync to persist the hash change - glog.V(3).Infof("DataWatcher: Starting force sync for user %s", userID) if err := dw.cacheManager.ForceSync(); err != nil { - glog.V(4).Infof("DataWatcher: Failed to force sync after hash update for user %s: %v", userID, err) + glog.Errorf("DataWatcher: Failed to force sync after hash update for user %s: %v", userID, err) return false } else { glog.V(2).Infof("DataWatcher: Force sync completed after hash update for user %s", userID) @@ -1277,11 +1197,7 @@ func (dw *DataWatcher) ProcessSingleAppToLatest(userID, sourceID string, pending appName := dw.getAppName(pendingApp) glog.V(2).Infof("Pipeline: ProcessSingleAppToLatest user=%s, source=%s, id=%s, name=%s", userID, sourceID, appID, appName) - // Acquire write lock to move data - if !dw.cacheManager.mutex.TryLock() { - glog.Warningf("[TryLock] ProcessSingleAppToLatest: Write lock not available for user=%s, source=%s, skipping", userID, sourceID) - return false - } + dw.cacheManager.mutex.Lock() defer dw.cacheManager.mutex.Unlock() userData, userExists := dw.cacheManager.cache.Users[userID] diff --git a/internal/v2/appinfo/hydration.go b/internal/v2/appinfo/hydration.go index 7ebc528..6134860 100644 --- a/internal/v2/appinfo/hydration.go +++ b/internal/v2/appinfo/hydration.go @@ -1076,13 +1076,9 @@ func (h *Hydrator) moveTaskToRenderFailed(task *hydrationfn.HydrationTask, failu return } - // Find the pending data for this task var pendingData *types.AppInfoLatestPendingData if h.cacheManager != nil { - if !h.cacheManager.mutex.TryRLock() { - glog.Warningf("[TryRLock] Hydrator.moveTaskToRenderFailed: CacheManager read lock not available for user %s, skipping, source: %s, id: %s, name: %s, version: %s", task.UserID, task.SourceID, task.AppID, task.AppName, task.AppVersion) - return - } + h.cacheManager.mutex.RLock() userData, userExists := h.cache.Users[task.UserID] if !userExists { h.cacheManager.mutex.RUnlock() @@ -1141,19 +1137,15 @@ func (h *Hydrator) removeFromPendingList(userID, sourceID, appID, appName, appVe return } - // 1) Read-lock phase: locate index to remove (no writes under RLock) - if !h.cacheManager.mutex.TryRLock() { - glog.Warningf("[TryRLock] Hydrator.removeFromPendingList: CacheManager read lock not available for user %s, source %s, app %s %s %s, skipping", userID, sourceID, appID, appName, appVersion) - return - } + h.cacheManager.mutex.Lock() + defer h.cacheManager.mutex.Unlock() + userData, userExists := h.cache.Users[userID] if !userExists { - h.cacheManager.mutex.RUnlock() return } sourceData, sourceExists := userData.Sources[sourceID] if !sourceExists { - h.cacheManager.mutex.RUnlock() return } removeIdx := -1 @@ -1164,20 +1156,10 @@ func (h *Hydrator) removeFromPendingList(userID, sourceID, appID, appName, appVe break } } - h.cacheManager.mutex.RUnlock() - if removeIdx == -1 { return } - // 2) Try to acquire short write-lock and apply removal with new slice; skip if contended - // Use TryLock to avoid blocking - if !h.cacheManager.mutex.TryLock() { - glog.Warningf("[TryLock] DEBUG: removeFromPendingList skipped (lock not available) for user=%s source=%s app=%s %s %s", userID, sourceID, appID, appName, appVersion) - return - } - defer h.cacheManager.mutex.Unlock() - // Re-validate pointers under write-lock if userData2, ok := h.cache.Users[userID]; ok { if sourceData2, ok2 := userData2.Sources[sourceID]; ok2 { @@ -1825,12 +1807,8 @@ func (h *Hydrator) cleanupOldTasks() { func (h *Hydrator) isAppInLatestQueue(userID, sourceID, appID, appName, version string) bool { glog.V(3).Infof("DEBUG: isAppInLatestQueue checking appID=%s %s, version=%s for user=%s, source=%s", appID, appName, version, userID, sourceID) - // Use CacheManager's lock if available if h.cacheManager != nil { - if !h.cacheManager.mutex.TryRLock() { - glog.Warningf("[TryRLock] Hydrator.isAppInLatestQueue: CacheManager read lock not available for user: %s, source: %s, app: %s %s %s, returning false", userID, sourceID, appID, appName, version) - return false - } + h.cacheManager.mutex.RLock() defer h.cacheManager.mutex.RUnlock() userData, userExists := h.cache.Users[userID] @@ -2053,12 +2031,8 @@ func (h *Hydrator) convertLatestDataToMap(latestData *types.AppInfoLatestData) m // isAppInRenderFailedList checks if an app already exists in the render failed list func (h *Hydrator) isAppInRenderFailedList(userID, sourceID, appID, appName string) bool { - // Use CacheManager's lock if available if h.cacheManager != nil { - if !h.cacheManager.mutex.TryRLock() { - glog.Warningf("[TryRLock] Hydrator.isAppInRenderFailedList: CacheManager read lock not available for user %s, source %s, app %s %s, returning false", userID, sourceID, appID, appName) - return false - } + h.cacheManager.mutex.RLock() defer h.cacheManager.mutex.RUnlock() userData, userExists := h.cache.Users[userID] diff --git a/internal/v2/appinfo/hydrationfn/task_for_api.go b/internal/v2/appinfo/hydrationfn/task_for_api.go index bfbbe86..fd7fe76 100644 --- a/internal/v2/appinfo/hydrationfn/task_for_api.go +++ b/internal/v2/appinfo/hydrationfn/task_for_api.go @@ -164,11 +164,8 @@ func (s *TaskForApiStep) writeAppDataToCache(task *HydrationTask, appData interf return fmt.Errorf("app_data is not in expected format, app=%s, appName=%s", task.AppID, task.AppName) } - // Now acquire the lock for cache operations if task.CacheManager != nil { - if !task.CacheManager.TryLock() { - return fmt.Errorf("write lock not available for cache update, user=%s, source=%s, app=%s, appName=%s", task.UserID, task.SourceID, task.AppID, task.AppName) - } + task.CacheManager.Lock() defer task.CacheManager.Unlock() } diff --git a/internal/v2/appinfo/status_correction_check.go b/internal/v2/appinfo/status_correction_check.go index 53e152a..7eb1844 100644 --- a/internal/v2/appinfo/status_correction_check.go +++ b/internal/v2/appinfo/status_correction_check.go @@ -274,12 +274,7 @@ func (scc *StatusCorrectionChecker) performStatusCheck() { glog.Errorf("StatusCorrectionChecker: failed to calculate hash for user %s: %v", userID, err) continue } - // Write back hash with lock - glog.V(3).Infof("[LOCK] scc.cacheManager.mutex.TryLock() @status_correction:updateHash Start") - if !scc.cacheManager.mutex.TryLock() { - glog.Warning("[TryLock] StatusCorrectionChecker: CacheManager write lock not available for hash update, skipping") - continue - } + scc.cacheManager.mutex.Lock() userData.Hash = newHash scc.cacheManager.mutex.Unlock() glog.V(2).Infof("StatusCorrectionChecker: user %s hash updated to %s", userID, newHash) diff --git a/internal/v2/appinfo/syncer.go b/internal/v2/appinfo/syncer.go index 4b24853..ea92ddb 100644 --- a/internal/v2/appinfo/syncer.go +++ b/internal/v2/appinfo/syncer.go @@ -487,25 +487,15 @@ func (s *Syncer) executeSyncCycleWithSource(ctx context.Context, source *setting // Get all existing user IDs with minimal locking var userIDs []string - // Use CacheManager if available, otherwise use direct cache access if cacheManager := s.cacheManager.Load(); cacheManager != nil { - // Use CacheManager's lock - if !cacheManager.mutex.TryRLock() { - glog.Warning("[TryRLock] Syncer: CacheManager read lock not available, skipping user ID collection") - return fmt.Errorf("read lock not available") - } + cacheManager.mutex.RLock() for userID := range s.cache.Users { userIDs = append(userIDs, userID) } cacheManager.mutex.RUnlock() - // If no users exist, create a system user as fallback if len(userIDs) == 0 { - glog.V(3).Infof("[LOCK] cacheManager.mutex.TryLock() @syncer:createSystemUser Start") - if !cacheManager.mutex.TryLock() { - glog.Warning("[TryLock] Syncer: CacheManager write lock not available for system user creation, skipping") - return fmt.Errorf("write lock not available") - } + cacheManager.mutex.Lock() // Double-check after acquiring write lock if len(s.cache.Users) == 0 { systemUserID := "system" @@ -550,13 +540,8 @@ func (s *Syncer) executeSyncCycleWithSource(ctx context.Context, source *setting // storeDataDirectly stores data directly to cache without going through CacheManager func (s *Syncer) storeDataDirectly(userID, sourceID string, completeData map[string]interface{}) { - // Use CacheManager's lock if available if cacheManager := s.cacheManager.Load(); cacheManager != nil { - glog.V(3).Infof("[LOCK] cacheManager.mutex.TryLock() @syncer:storeDataDirectly Start") - if !cacheManager.mutex.TryLock() { - glog.Warning("[TryLock] Syncer: CacheManager write lock not available for data storage, skipping") - return - } + cacheManager.mutex.Lock() defer cacheManager.mutex.Unlock() } else { // Fallback: no lock protection (not recommended) @@ -829,10 +814,7 @@ func (s *Syncer) storeDataViaCacheManager(userIDs []string, sourceID string, com for _, userID := range userIDs { // Check if the source is local type - skip syncer operations for local sources if cacheManager := s.cacheManager.Load(); cacheManager != nil { - if !cacheManager.mutex.TryRLock() { - glog.Warningf("[TryRLock] Syncer.storeDataViaCacheManager: CacheManager read lock not available for user %s, source %s, skipping", userID, sourceID) - continue - } + cacheManager.mutex.RLock() userData, userExists := s.cache.Users[userID] if userExists { sourceData, sourceExists := userData.Sources[sourceID] From 6371316f89ee3ecb4c4179071f68c3e0cb42d209 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 28 Feb 2026 13:22:19 +0000 Subject: [PATCH 04/45] refactor: replace remaining TryLock with Lock on pipeline execution path - hydration.go: markTaskCompleted/markTaskFailed taskMutex.TryLock -> Lock - hydration.go: addToCompletedHistory/addToFailedHistory workerStatusMutex.TryLock -> Lock - syncerfn/detail_fetch_step.go: TryRLock/TryLock -> RLock/Lock (called from Syncer steps in pipeline) Co-authored-by: aby913 --- internal/v2/appinfo/hydration.go | 21 ++++--------------- .../v2/appinfo/syncerfn/detail_fetch_step.go | 21 +++++-------------- 2 files changed, 9 insertions(+), 33 deletions(-) diff --git a/internal/v2/appinfo/hydration.go b/internal/v2/appinfo/hydration.go index 6134860..9dae92b 100644 --- a/internal/v2/appinfo/hydration.go +++ b/internal/v2/appinfo/hydration.go @@ -921,10 +921,7 @@ func (h *Hydrator) markTaskCompleted(task *hydrationfn.HydrationTask, startedAt sourceChartPath = path } - if !h.taskMutex.TryLock() { - glog.Warningf("[TryLock] Failed to acquire lock for markTaskCompleted, skipping status update, task: %s, user: %s, source: %s, id: %s, name: %s, version: %s", task.ID, task.UserID, task.SourceID, task.AppID, task.AppName, task.AppVersion) - return - } + h.taskMutex.Lock() delete(h.activeTasks, task.ID) // Clean up in-memory data under lock @@ -966,11 +963,7 @@ func (h *Hydrator) markTaskFailed(task *hydrationfn.HydrationTask, startedAt tim sourceChartPath = path } - if !h.taskMutex.TryLock() { - glog.Warningf("[TryLock] Failed to acquire lock for markTaskFailed, skipping status update, task: %s, user: %s, source: %s, id: %s, name: %s, version: %s, error: %s", task.ID, task.UserID, task.SourceID, task.AppID, task.AppName, task.AppVersion, errorMsg) - return - } - + h.taskMutex.Lock() task.SetStatus(hydrationfn.TaskStatusFailed) delete(h.activeTasks, task.ID) @@ -1015,10 +1008,7 @@ func (h *Hydrator) markTaskFailed(task *hydrationfn.HydrationTask, startedAt tim // addToCompletedHistory adds a task to the completed tasks history func (h *Hydrator) addToCompletedHistory(task *hydrationfn.HydrationTask, startedAt time.Time, duration time.Duration) { - if !h.workerStatusMutex.TryLock() { - glog.Warningf("[TryLock] Failed to acquire lock for addToCompletedHistory, skipping, task: %s, user: %s, source: %s, id: %s, name: %s, version: %s", task.ID, task.UserID, task.SourceID, task.AppID, task.AppName, task.AppVersion) - return - } + h.workerStatusMutex.Lock() defer h.workerStatusMutex.Unlock() entry := &TaskHistoryEntry{ @@ -1042,10 +1032,7 @@ func (h *Hydrator) addToCompletedHistory(task *hydrationfn.HydrationTask, starte // addToFailedHistory adds a task to the failed tasks history func (h *Hydrator) addToFailedHistory(task *hydrationfn.HydrationTask, startedAt time.Time, duration time.Duration, failedStep string, errorMsg string) { - if !h.workerStatusMutex.TryLock() { - glog.Warningf("[TryLock] Failed to acquire lock for addToFailedHistory, skipping, task: %s, user: %s, source: %s, id: %s, name: %s, version: %s", task.ID, task.UserID, task.SourceID, task.AppID, task.AppName, task.AppVersion) - return - } + h.workerStatusMutex.Lock() defer h.workerStatusMutex.Unlock() entry := &TaskHistoryEntry{ diff --git a/internal/v2/appinfo/syncerfn/detail_fetch_step.go b/internal/v2/appinfo/syncerfn/detail_fetch_step.go index e7962e4..45fe8ce 100644 --- a/internal/v2/appinfo/syncerfn/detail_fetch_step.go +++ b/internal/v2/appinfo/syncerfn/detail_fetch_step.go @@ -588,12 +588,8 @@ func (d *DetailFetchStep) removeAppFromCache(appID string, appInfoMap map[string return } - // Step 1: Use try read lock to find all data that needs to be removed - glog.V(2).Infof("Step 1: Attempting to acquire read lock to find data for removal") - if !data.CacheManager.TryRLock() { - glog.Warningf("[TryRLock] Warning: Read lock not available for app removal, skipping: %s %s", appID, appName) - return - } + glog.V(2).Infof("Step 1: Acquiring read lock to find data for removal") + data.CacheManager.RLock() // Collect all data that needs to be removed type RemovalData struct { @@ -673,11 +669,8 @@ func (d *DetailFetchStep) removeAppFromCache(appID string, appInfoMap map[string return } - glog.V(2).Info("Step 2: Attempting to acquire write lock to update data") - if !data.CacheManager.TryLock() { - glog.Warningf("[TryLock] Warning: Write lock not available for app removal, skipping: %s %s", appID, appName) - return - } + glog.V(2).Info("Step 2: Acquiring write lock to update data") + data.CacheManager.Lock() defer data.CacheManager.Unlock() // Collect sync requests to trigger after releasing the lock @@ -1006,11 +999,7 @@ func (d *DetailFetchStep) isAppInstalled(appName, sourceID string, data *SyncCon return false } - // English comment: use try read lock to safely inspect installation states - if !data.CacheManager.TryRLock() { - glog.Warningf("[TryRLock] Warning: Read lock not available for isAppInstalled check, returning false, source: %s, name: %s", sourceID, appName) - return false - } + data.CacheManager.RLock() defer data.CacheManager.RUnlock() for _, userData := range data.Cache.Users { From c5d66362e649d2ba55c8c5b468330cc38ef1e98a Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 28 Feb 2026 13:41:23 +0000 Subject: [PATCH 05/45] refactor: replace all TryLock/TryRLock with Lock/RLock in cache.go All cache.go internal methods now use blocking locks instead of non-blocking TryLock. This is safe because: - Pipeline goroutine: holds lock briefly for in-memory operations (no HTTP calls) - DataWatcherState (NATS): holds lock briefly via SetAppData - API handlers: use RLock for reads, briefly wait during writes - No HTTP calls inside any locked section in cache.go Removed ~120 lines of TryLock failure handling, warning logs, and complex lock acquisition patterns (goroutine+channel+timeout for cleanup worker). Only the TryLock()/TryRLock() public methods on CacheManager remain as part of the CacheManagerInterface (kept for backward compatibility). Co-authored-by: aby913 --- internal/v2/appinfo/cache.go | 145 ++++++----------------------------- 1 file changed, 24 insertions(+), 121 deletions(-) diff --git a/internal/v2/appinfo/cache.go b/internal/v2/appinfo/cache.go index 0036302..75592d9 100644 --- a/internal/v2/appinfo/cache.go +++ b/internal/v2/appinfo/cache.go @@ -118,11 +118,7 @@ func (cm *CacheManager) GetUserDataNoLock(userID string) *UserData { // GetUserDataWithFallback retrieves user data with fallback mechanism // Uses TryRLock to avoid blocking - returns nil if lock is not available immediately func (cm *CacheManager) GetUserDataWithFallback(userID string) *UserData { - if !cm.mutex.TryRLock() { - // Lock not available immediately, return nil to avoid blocking - glog.Warningf("[TryRLock] GetUserDataWithFallback: Read lock not available for user %s, returning nil", userID) - return nil - } + cm.mutex.RLock() defer cm.mutex.RUnlock() if cm.cache == nil { @@ -135,11 +131,7 @@ func (cm *CacheManager) GetUserDataWithFallback(userID string) *UserData { // GetAllUsersDataWithFallback returns all users data with fallback mechanism // Uses TryRLock to avoid blocking - returns empty map if lock is not available immediately func (cm *CacheManager) GetAllUsersDataWithFallback() map[string]*UserData { - if !cm.mutex.TryRLock() { - // Lock not available immediately, return empty map to avoid blocking - glog.Warning("[TryRLock] GetAllUsersData: Read lock not available, returning empty map") - return make(map[string]*UserData) - } + cm.mutex.RLock() defer cm.mutex.RUnlock() if cm.cache == nil { @@ -355,11 +347,7 @@ func (cm *CacheManager) processSyncRequest(req SyncRequest) { // GetUserData retrieves user data from cache func (cm *CacheManager) GetUserData(userID string) *UserData { - glog.V(4).Infof("[LOCK] cm.mutex.TryRLock() @184 Start") - if !cm.mutex.TryRLock() { - glog.Warningf("[TryRLock] GetUserData: Read lock not available for user %s, returning nil", userID) - return nil - } + cm.mutex.RLock() defer cm.mutex.RUnlock() return cm.cache.Users[userID] @@ -372,11 +360,7 @@ func (cm *CacheManager) getUserData(userID string) *UserData { // GetSourceData retrieves source data from cache func (cm *CacheManager) GetSourceData(userID, sourceID string) *SourceData { - glog.V(4).Infof("[LOCK] cm.mutex.TryRLock() @197 Start") - if !cm.mutex.TryRLock() { - glog.Warningf("[TryRLock] GetSourceData: Read lock not available for user %s, source %s, returning nil", userID, sourceID) - return nil - } + cm.mutex.RLock() defer cm.mutex.RUnlock() if userData, exists := cm.cache.Users[userID]; exists { @@ -388,9 +372,7 @@ func (cm *CacheManager) GetSourceData(userID, sourceID string) *SourceData { // GetAppVersionFromState retrieves app version from AppStateLatest in the specified source // Returns version and found flag func (cm *CacheManager) GetAppVersionFromState(userID, sourceID, appName string) (version string, found bool) { - if !cm.mutex.TryRLock() { - return "", false - } + cm.mutex.RLock() defer cm.mutex.RUnlock() userData := cm.cache.Users[userID] @@ -554,13 +536,8 @@ func (cm *CacheManager) updateAppStateLatest(userID, sourceID string, sourceData } func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType AppDataType, data map[string]interface{}) error { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @269 Start") - if !cm.mutex.TryLock() { - glog.Warningf("[TryLock] setAppDataInternal: Write lock not available for user %s, source %s, type %v, skipping", userID, sourceID, dataType) - return fmt.Errorf("write lock not available") - } + cm.mutex.Lock() cm.updateLockStats("lock") - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @269 Success") // Watchdog: warn if write lock is held >1s watchdogFired := make(chan struct{}, 1) timer := time.AfterFunc(1*time.Second, func() { @@ -1074,13 +1051,8 @@ func (cm *CacheManager) SetAppData(userID, sourceID string, dataType AppDataType } func (cm *CacheManager) setLocalAppDataInternal(userID, sourceID string, dataType AppDataType, data types.AppInfoLatestData) error { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @SetLocalAppData Start") - if !cm.mutex.TryLock() { - glog.Warningf("[TryLock] setLocalAppDataInternal: Write lock not available for user %s, source %s, type %v, skipping", userID, sourceID, dataType) - return fmt.Errorf("write lock not available") - } + cm.mutex.Lock() cm.updateLockStats("lock") - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @SetLocalAppData Success") _wd := cm.startLockWatchdog("@SetLocalAppData") defer func() { @@ -1158,11 +1130,7 @@ func (cm *CacheManager) SetLocalAppData(userID, sourceID string, dataType AppDat // GetAppData retrieves app data from cache using single global lock func (cm *CacheManager) GetAppData(userID, sourceID string, dataType AppDataType) interface{} { - glog.V(4).Infof("[LOCK] cm.mutex.TryRLock() @543 Start") - if !cm.mutex.TryRLock() { - glog.Warningf("[TryRLock] GetAppData: Read lock not available for user %s, source %s, type %v, returning nil", userID, sourceID, dataType) - return nil - } + cm.mutex.RLock() defer cm.mutex.RUnlock() if userData, exists := cm.cache.Users[userID]; exists { @@ -1187,12 +1155,7 @@ func (cm *CacheManager) GetAppData(userID, sourceID string, dataType AppDataType // RemoveUserData removes user data from cache and Redis func (cm *CacheManager) removeUserDataInternal(userID string) error { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @568 Start") - if !cm.mutex.TryLock() { - glog.Warningf("[TryLock] removeUserDataInternal: Write lock not available for user %s, skipping", userID) - return fmt.Errorf("write lock not available") - } - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @568 Success") + cm.mutex.Lock() _wd := cm.startLockWatchdog("@568:removeUser") defer func() { cm.mutex.Unlock(); _wd() }() @@ -1221,12 +1184,7 @@ func (cm *CacheManager) RemoveUserData(userID string) error { // AddUser adds a new user to the cache func (cm *CacheManager) addUserInternal(userID string) error { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @AddUser Start") - if !cm.mutex.TryLock() { - glog.Warningf("[TryLock] addUserInternal: Write lock not available for user %s, skipping", userID) - return fmt.Errorf("write lock not available") - } - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @AddUser Success") + cm.mutex.Lock() _wd := cm.startLockWatchdog("@AddUser") defer func() { cm.mutex.Unlock(); _wd() }() @@ -1277,11 +1235,7 @@ func (cm *CacheManager) AddUser(userID string) error { // GetCacheStats returns cache statistics using single global lock func (cm *CacheManager) GetCacheStats() map[string]interface{} { - glog.V(4).Infof("[LOCK] cm.mutex.TryRLock() @586 Start") - if !cm.mutex.TryRLock() { - glog.Warning("[TryRLock] GetCacheStats: Read lock not available, returning empty stats") - return map[string]interface{}{"error": "lock not available"} - } + cm.mutex.RLock() defer cm.mutex.RUnlock() stats := make(map[string]interface{}) @@ -1329,11 +1283,7 @@ func (cm *CacheManager) ForceSync() error { // 1. Quickly obtain a data snapshot to minimize lock holding time var userDataSnapshot map[string]*UserData func() { - glog.V(4).Infof("[LOCK] cm.mutex.TryRLock() @617 Start") - if !cm.mutex.TryRLock() { - glog.Warning("[TryRLock] ForceSync: Read lock not available, returning error") - return - } + cm.mutex.RLock() defer func() { cm.mutex.RUnlock() glog.V(4).Infof("[LOCK] cm.mutex.RUnlock() @617 End") @@ -1409,10 +1359,7 @@ func (cm *CacheManager) GetForceSyncCooldown() time.Duration { // GetAllUsersData returns all users data from cache using single global lock func (cm *CacheManager) GetAllUsersData() map[string]*UserData { - if !cm.mutex.TryRLock() { - glog.Warning("[TryRLock] GetAllUsersData: Read lock not available, returning empty map") - return make(map[string]*UserData) - } + cm.mutex.RLock() defer cm.mutex.RUnlock() if cm.cache == nil { @@ -1435,11 +1382,7 @@ func (cm *CacheManager) GetAllUsersData() map[string]*UserData { // HasUserStateDataForSource checks if any user has non-empty state data for a specific source func (cm *CacheManager) HasUserStateDataForSource(sourceID string) bool { - glog.V(4).Infof("[LOCK] cm.mutex.TryRLock() @HasUserStateDataForSource Start") - if !cm.mutex.TryRLock() { - glog.Warningf("[TryRLock] HasUserStateDataForSource: Read lock not available for source %s, returning false", sourceID) - return false - } + cm.mutex.RLock() defer func() { cm.mutex.RUnlock() glog.V(4).Infof("[LOCK] cm.mutex.RUnlock() @HasUserStateDataForSource End") @@ -1466,12 +1409,7 @@ func (cm *CacheManager) HasUserStateDataForSource(sourceID string) bool { // UpdateUserConfig updates the user configuration and ensures all users have data structures func (cm *CacheManager) updateUserConfigInternal(newUserConfig *UserConfig) error { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @660 Start") - if !cm.mutex.TryLock() { - glog.Warning("[TryLock] updateUserConfigInternal: Write lock not available, skipping") - return fmt.Errorf("write lock not available") - } - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @660 Success") + cm.mutex.Lock() _wd := cm.startLockWatchdog("@660:updateUserConfig") defer func() { cm.mutex.Unlock(); _wd() }() @@ -1541,12 +1479,7 @@ func (cm *CacheManager) UpdateUserConfig(newUserConfig *UserConfig) error { // SyncUserListToCache ensures all users from current userConfig have initialized data structures func (cm *CacheManager) syncUserListToCacheInternal() error { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @718 Start") - if !cm.mutex.TryLock() { - glog.Warning("[TryLock] syncUserListToCacheInternal: Write lock not available, skipping") - return fmt.Errorf("write lock not available") - } - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @718 Success") + cm.mutex.Lock() _wd := cm.startLockWatchdog("@718:syncUserList") defer func() { cm.mutex.Unlock(); _wd() }() @@ -1594,11 +1527,7 @@ func (cm *CacheManager) SyncUserListToCache() error { // CleanupInvalidPendingData removes invalid pending data entries that lack required identifiers func (cm *CacheManager) cleanupInvalidPendingDataInternal() int { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @751 Start") - if !cm.mutex.TryLock() { - glog.Warning("[TryLock] CleanupInvalidPendingData: Write lock not available, skipping cleanup") - return 0 - } + cm.mutex.Lock() _wd := cm.startLockWatchdog("@751:cleanupInvalidPending") defer func() { cm.mutex.Unlock(); _wd() }() @@ -1884,11 +1813,7 @@ func (cm *CacheManager) updateLockStats(lockType string) { // RemoveAppStateData removes a specific app from AppStateLatest for a user and source func (cm *CacheManager) removeAppStateDataInternal(userID, sourceID, appName string) error { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @RemoveAppStateData Start") - if !cm.mutex.TryLock() { - glog.Warningf("[TryLock] RemoveAppStateData: Write lock not available for user %s, source %s, app %s, skipping", userID, sourceID, appName) - return fmt.Errorf("write lock not available") - } + cm.mutex.Lock() _wd := cm.startLockWatchdog("@RemoveAppStateData") defer func() { cm.mutex.Unlock(); _wd() }() @@ -1941,12 +1866,7 @@ func (cm *CacheManager) RemoveAppStateData(userID, sourceID, appName string) err // RemoveAppInfoLatestData removes a specific app from AppInfoLatest for a user and source func (cm *CacheManager) removeAppInfoLatestDataInternal(userID, sourceID, appName string) error { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @RemoveAppInfoLatestData Start") - if !cm.mutex.TryLock() { - glog.Warningf("[TryLock] removeAppInfoLatestDataInternal: Write lock not available for user %s, source %s, app %s, skipping", userID, sourceID, appName) - return fmt.Errorf("write lock not available") - } - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @RemoveAppInfoLatestData Success") + cm.mutex.Lock() _wd := cm.startLockWatchdog("@RemoveAppInfoLatestData") defer func() { cm.mutex.Unlock(); _wd() }() @@ -2032,12 +1952,7 @@ func (cm *CacheManager) GetSettingsManager() *settings.SettingsManager { // SyncMarketSourcesToCache synchronizes market sources to all users in cache func (cm *CacheManager) syncMarketSourcesToCacheInternal(sources []*settings.MarketSource) error { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @SyncMarketSourcesToCache Start") - if !cm.mutex.TryLock() { - glog.Warningf("[TryLock] syncMarketSourcesToCacheInternal: Write lock not available, skipping") - return fmt.Errorf("write lock not available") - } - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @SyncMarketSourcesToCache Success") + cm.mutex.Lock() _wd := cm.startLockWatchdog("@SyncMarketSourcesToCache") defer func() { cm.mutex.Unlock() @@ -2110,12 +2025,7 @@ func (cm *CacheManager) SyncMarketSourcesToCache(sources []*settings.MarketSourc } func (cm *CacheManager) resynceUserInternal() error { - glog.V(4).Info("[LOCK] cm.mutex.TryLock() @resynceUserInternal Start") - if !cm.mutex.TryLock() { - glog.Warning("[TryLock] resynceUserInternal: Write lock not available, skipping") - return fmt.Errorf("write lock not available") - } - glog.V(4).Info("[LOCK] cm.mutex.TryLock() @resynceUserInternal Success") + cm.mutex.Lock() _wd := cm.startLockWatchdog("@resynceUserInternal") defer func() { cm.mutex.Unlock(); _wd() }() @@ -2182,10 +2092,7 @@ func (cm *CacheManager) ClearAppRenderFailedData() { counts := make(map[target]int) glog.V(3).Info("INFO: [Cleanup] Attempting to acquire read lock for scan phase") - if !cm.mutex.TryRLock() { - glog.Warning("[TryRLock] INFO: [Cleanup] Read lock not available for scan phase, skipping cleanup") - return - } + cm.mutex.RLock() scanLockAcquiredAt := time.Now() glog.V(3).Info("INFO: [Cleanup] Read lock acquired (scan). Hold minimal time") @@ -2232,9 +2139,8 @@ func (cm *CacheManager) ClearAppRenderFailedData() { go func() { done := make(chan struct{}, 1) go func() { - if cm.mutex.TryLock() { + cm.mutex.Lock() done <- struct{}{} - } }() select { case <-done: @@ -2318,10 +2224,7 @@ func (cm *CacheManager) ListUsers() { userList = append(userList, user) } - if flag := cm.TryLock(); !flag { - glog.Warning("[TryLock] watch user list lock failed") - return - } + cm.Lock() defer cm.Unlock() if len(cm.cache.Users) == 0 { From 329d52888faaaa7efbcae8291cdea0419630dce7 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 28 Feb 2026 13:53:19 +0000 Subject: [PATCH 06/45] refactor: remove dead code from hydration.go and fix remaining TryLock in appinfomodule.go Removed 20 dead methods (~830 lines) from hydration.go that were part of the old worker pool / pendingDataMonitor model, now replaced by Pipeline: - EnqueueTask, worker, processTask, updateWorkerStatus - logTaskDataBeforeStep, logTaskDataAfterStep, getMapKeys - pendingDataMonitor, checkForPendingData - createTasksFromPendingData, createTasksFromPendingDataMap - isAppDataHydrationComplete, hasActiveTaskForApp, trackTask - hasRequiredRawDataFields, looksLikeAppsMap - ForceAddTaskFromLatestData, convertLatestDataToMap, ForceCheckPendingData appinfomodule.go: replaced last 2 TryLock/TryRLock calls with Lock/RLock in correctCacheWithChartRepo and GetInvalidDataReport. Co-authored-by: aby913 --- internal/v2/appinfo/appinfomodule.go | 15 +- internal/v2/appinfo/hydration.go | 838 +-------------------------- 2 files changed, 3 insertions(+), 850 deletions(-) diff --git a/internal/v2/appinfo/appinfomodule.go b/internal/v2/appinfo/appinfomodule.go index 420ddf9..1557f27 100644 --- a/internal/v2/appinfo/appinfomodule.go +++ b/internal/v2/appinfo/appinfomodule.go @@ -772,12 +772,7 @@ func (m *AppInfoModule) correctCacheWithChartRepo() error { return fmt.Errorf("cache manager not available") } - // Add detailed lock logs for diagnosis - glog.V(3).Infof("[LOCK] m.cacheManager.mutex.TryLock() @appinfomodule:cleanup Start") - if !m.cacheManager.mutex.TryLock() { - glog.Warning("[TryLock] AppInfoModule cleanup: CacheManager write lock not available, skipping cleanup") - return nil - } + m.cacheManager.mutex.Lock() defer m.cacheManager.mutex.Unlock() removedCount := 0 for userID, userData := range m.cacheManager.cache.Users { @@ -1377,13 +1372,7 @@ func (m *AppInfoModule) GetInvalidDataReport() map[string]interface{} { }, } - if !m.cacheManager.mutex.TryRLock() { - glog.Warning("[TryRLock] AppInfoModule: CacheManager read lock not available, skipping operation") - return map[string]interface{}{ - "error": "lock not available", - "status": "unknown", - } - } + m.cacheManager.mutex.RLock() defer m.cacheManager.mutex.RUnlock() totalUsers := 0 diff --git a/internal/v2/appinfo/hydration.go b/internal/v2/appinfo/hydration.go index 9dae92b..dbf1129 100644 --- a/internal/v2/appinfo/hydration.go +++ b/internal/v2/appinfo/hydration.go @@ -2,7 +2,6 @@ package appinfo import ( "context" - "encoding/json" "fmt" "os" "reflect" @@ -196,434 +195,17 @@ func (h *Hydrator) IsRunning() bool { return h.isRunning.Load() } -// EnqueueTask adds a task to the hydration queue -func (h *Hydrator) EnqueueTask(task *hydrationfn.HydrationTask) error { - if !h.IsRunning() { - return fmt.Errorf("hydrator is not running") - } - - select { - case h.taskQueue <- task: - h.trackTask(task) - glog.V(4).Infof("Enqueued hydration task: %s for app: %s (user: %s, source: %s) - Queue length: %d", - task.ID, task.AppID, task.UserID, task.SourceID, len(h.taskQueue)) - return nil - default: - glog.Errorf("ERROR: Task queue is full! Cannot enqueue task: %s for app: %s (user: %s, source: %s) - Queue length: %d", - task.ID, task.AppID, task.UserID, task.SourceID, len(h.taskQueue)) - return fmt.Errorf("task queue is full") - } -} - -// worker processes tasks from the queue -func (h *Hydrator) worker(ctx context.Context, workerID int) { - glog.V(3).Infof("Hydration worker %d started", workerID) - - // Initialize worker status - h.updateWorkerStatus(workerID, nil, true) - - defer func() { - // Mark worker as idle when stopping - h.updateWorkerStatus(workerID, nil, true) - glog.V(4).Infof("Hydration worker %d stopped", workerID) - }() - - for { - select { - case <-ctx.Done(): - return - case <-h.stopChan: - return - case task := <-h.taskQueue: - if task != nil { - glog.V(3).Infof("DEBUG: Worker %d received task from queue: %s for app: %s (user: %s, source: %s)", workerID, task.ID, task.AppID, task.UserID, task.SourceID) - h.processTask(ctx, task, workerID) - } - } - } -} - -// processTask processes a single hydration task -func (h *Hydrator) processTask(ctx context.Context, task *hydrationfn.HydrationTask, workerID int) { - // Add memory monitoring at the start of task processing - h.monitorMemoryUsage() - - glog.V(3).Info("==================== HYDRATION TASK STARTED ====================") - glog.V(3).Infof("Worker %d processing task: %s for app: %s", workerID, task.ID, task.AppID) - - // Update worker status to indicate it's processing this task - h.updateWorkerStatus(workerID, task, false) - taskStartTime := time.Now() - - // Ensure worker status is cleared when task completes or fails - defer h.updateWorkerStatus(workerID, nil, true) - - // Check if task is in cooldown period - if task.LastFailureTime != nil && time.Since(*task.LastFailureTime) < 5*time.Minute { - glog.V(4).Infof("Task %s is in cooldown period, skipping. Next retry available at: %v", - task.ID, task.LastFailureTime.Add(5*time.Minute)) - return - } - - task.SetStatus(hydrationfn.TaskStatusRunning) - - // Execute all steps - for i, step := range h.steps { - if task.CurrentStep > i { - continue // Skip already completed steps - } - - // Check if step can be skipped - if step.CanSkip(ctx, task) { - glog.V(3).Infof("Skipping step %d (%s) for task: %s", i+1, step.GetStepName(), task.ID) - glog.V(3).Infof("-------- HYDRATION STEP %d/%d SKIPPED: %s --------", i+1, len(h.steps), step.GetStepName()) - task.IncrementStep() - continue - } - - glog.V(3).Infof("-------- HYDRATION STEP %d/%d STARTED: %s --------", i+1, len(h.steps), step.GetStepName()) - glog.V(3).Infof("Executing step %d (%s) for task: %s", i+1, step.GetStepName(), task.ID) - - // Update worker status with current step - h.updateWorkerStatus(workerID, task, false) - - // Log task data before step execution - h.logTaskDataBeforeStep(task, i+1, step.GetStepName()) - - // Execute step - if err := step.Execute(ctx, task); err != nil { - glog.Errorf("Step %d (%s) failed for task: %s, app: %s %s %s, error: %v", i+1, step.GetStepName(), task.ID, task.AppID, task.AppName, task.AppVersion, err) - glog.Errorf("-------- HYDRATION STEP %d/%d FAILED: %s --------", i+1, len(h.steps), step.GetStepName()) - task.SetError(err) - - // Clean up resources before failure - h.cleanupTaskResources(task) - - // Set failure time - now := time.Now() - task.LastFailureTime = &now - - // Comment out retry logic - instead move to render failed list - /* - // Check if task can be retried - if task.CanRetry() { - glog.Infof("Task %s failed, will retry after cooldown period (5 minutes). Next retry available at: %v", - task.ID, task.LastFailureTime.Add(5*time.Minute)) - task.ResetForRetry() - - // Re-enqueue for retry after cooldown - go func() { - time.Sleep(5 * time.Minute) // Wait for cooldown period - if err := h.EnqueueTask(task); err != nil { - glog.Errorf("Failed to re-enqueue task for retry: %s, error: %v", task.ID, err) - h.markTaskFailed(task, time.Now(), 0, "retry", err.Error()) - } - }() - glog.V(3).Infof("==================== HYDRATION TASK QUEUED FOR RETRY AFTER COOLDOWN ====================") - return - } else { - // Max retries exceeded - glog.V(3).Infof("Task failed after max retries: %s", task.ID) - h.markTaskFailed(task, time.Now(), 0, "max_retries", "max retries exceeded") - glog.V(3).Infof("==================== HYDRATION TASK FAILED ====================") - return - } - */ - - // Move failed task to render failed list instead of retrying - failureReason := err.Error() - failureStep := step.GetStepName() - - glog.Errorf("Task %s failed at step %s, moving to render failed list with reason: %s", - task.ID, failureStep, failureReason) - - duration := time.Since(taskStartTime) - h.moveTaskToRenderFailed(task, failureReason, failureStep) - h.markTaskFailed(task, taskStartTime, duration, failureStep, failureReason) - - glog.Errorf("==================== HYDRATION TASK MOVED TO RENDER FAILED LIST ====================") - return - } - - // Log task data after step execution - h.logTaskDataAfterStep(task, i+1, step.GetStepName()) - - task.IncrementStep() - glog.V(3).Infof("Step %d (%s) completed for task: %s", i+1, step.GetStepName(), task.ID) - glog.V(4).Infof("-------- HYDRATION STEP %d/%d COMPLETED: %s --------", i+1, len(h.steps), step.GetStepName()) - } - - // All steps completed successfully - task.SetStatus(hydrationfn.TaskStatusCompleted) - duration := time.Since(taskStartTime) - h.markTaskCompleted(task, taskStartTime, duration) - - glog.V(3).Infof("Task completed successfully: %s for app: %s", task.ID, task.AppID) - glog.V(4).Infof("==================== HYDRATION TASK COMPLETED ====================") - glog.V(4).Infoln("") -} - -// updateWorkerStatus updates the status of a worker -func (h *Hydrator) updateWorkerStatus(workerID int, task *hydrationfn.HydrationTask, isIdle bool) { - if !h.workerStatusMutex.TryLock() { - return // Skip if can't acquire lock - } - defer h.workerStatusMutex.Unlock() - - if isIdle { - delete(h.workerStatus, workerID) - return - } - - // Worker is processing a task - var taskInfo *TaskInfo - if task != nil { - taskInfo = h.taskToTaskInfo(task) - } - - h.workerStatus[workerID] = &WorkerStatus{ - WorkerID: workerID, - IsIdle: false, - CurrentTask: taskInfo, - LastActivity: time.Now(), - } -} - -// logTaskDataBeforeStep logs task data before step execution to help debug JSON cycle issues -func (h *Hydrator) logTaskDataBeforeStep(task *hydrationfn.HydrationTask, stepNum int, stepName string) { - glog.V(3).Infof("DEBUG: Before step %d (%s) - Task data structure check", stepNum, stepName) - - // Try to JSON marshal task.ChartData - if len(task.ChartData) > 0 { - if jsonData, err := json.Marshal(task.ChartData); err != nil { - glog.Errorf("ERROR: JSON marshal failed for task.ChartData before step %d: %v, ChartData keys: %v", stepNum, err, h.getMapKeys(task.ChartData)) - } else { - glog.V(3).Infof("DEBUG: task.ChartData JSON length before step %d: %d bytes", stepNum, len(jsonData)) - } - } - - // Try to JSON marshal task.AppData - if len(task.AppData) > 0 { - if jsonData, err := json.Marshal(task.AppData); err != nil { - glog.Errorf("ERROR: JSON marshal failed for task.AppData before step %d: %v, AppData keys: %v", stepNum, err, h.getMapKeys(task.AppData)) - } else { - glog.V(3).Infof("DEBUG: task.AppData JSON length before step %d: %d bytes", stepNum, len(jsonData)) - } - } - - // Try to JSON marshal task.DatabaseUpdateData - if len(task.DatabaseUpdateData) > 0 { - if jsonData, err := json.Marshal(task.DatabaseUpdateData); err != nil { - glog.Errorf("ERROR: JSON marshal failed for task.DatabaseUpdateData before step %d: %v, DatabaseUpdateData keys: %v", stepNum, err, h.getMapKeys(task.DatabaseUpdateData)) - } else { - glog.V(3).Infof("DEBUG: task.DatabaseUpdateData JSON length before step %d: %d bytes", stepNum, len(jsonData)) - } - } -} - -// logTaskDataAfterStep logs task data after step execution to help debug JSON cycle issues -func (h *Hydrator) logTaskDataAfterStep(task *hydrationfn.HydrationTask, stepNum int, stepName string) { - glog.V(3).Infof("DEBUG: After step %d (%s) - Task data structure check", stepNum, stepName) - - // Try to JSON marshal task.ChartData - if len(task.ChartData) > 0 { - if jsonData, err := json.Marshal(task.ChartData); err != nil { - glog.Errorf("ERROR: JSON marshal failed for task.ChartData after step %d: %v, ChartData keys: %v", stepNum, err, h.getMapKeys(task.ChartData)) - } else { - glog.V(3).Infof("DEBUG: task.ChartData JSON length after step %d: %d bytes", stepNum, len(jsonData)) - } - } - - // Try to JSON marshal task.AppData - if len(task.AppData) > 0 { - if jsonData, err := json.Marshal(task.AppData); err != nil { - glog.Errorf("ERROR: JSON marshal failed for task.AppData after step %d: %v, AppData keys: %v", stepNum, err, h.getMapKeys(task.AppData)) - } else { - glog.V(3).Infof("DEBUG: task.AppData JSON length after step %d: %d bytes", stepNum, len(jsonData)) - } - } - - // Try to JSON marshal task.DatabaseUpdateData - if len(task.DatabaseUpdateData) > 0 { - if jsonData, err := json.Marshal(task.DatabaseUpdateData); err != nil { - glog.Errorf("ERROR: JSON marshal failed for task.DatabaseUpdateData after step %d: %v, DatabaseUpdateData keys: %v", stepNum, err, h.getMapKeys(task.DatabaseUpdateData)) - } else { - glog.V(3).Infof("DEBUG: task.DatabaseUpdateData JSON length after step %d: %d bytes", stepNum, len(jsonData)) - } - } -} - -// getMapKeys safely extracts keys from a map for debugging -func (h *Hydrator) getMapKeys(data map[string]interface{}) []string { - keys := make([]string, 0, len(data)) - for key := range data { - keys = append(keys, key) - } - return keys -} - -// cleanupTaskResources cleans up resources associated with a task func (h *Hydrator) cleanupTaskResources(task *hydrationfn.HydrationTask) { - // Clean up chart data - // if renderedDir, exists := task.ChartData["rendered_chart_dir"].(string); exists { - // if err := os.RemoveAll(renderedDir); err != nil { - // glog.Info("Warning: Failed to clean up rendered chart directory %s: %v", renderedDir, err) - // } - // } - - // Clean up source chart if sourceChartPath, exists := task.ChartData["source_chart_path"].(string); exists { if err := os.Remove(sourceChartPath); err != nil { glog.Errorf("Warning: Failed to clean up source chart file %s: %v", sourceChartPath, err) } } - - // Clear task data maps task.ChartData = make(map[string]interface{}) task.DatabaseUpdateData = make(map[string]interface{}) - - // Clear app data to reduce memory usage task.AppData = make(map[string]interface{}) } -// pendingDataMonitor monitors for new pending data and creates tasks -func (h *Hydrator) pendingDataMonitor(ctx context.Context) { - glog.V(3).Infoln("Pending data monitor started") - defer glog.V(3).Infoln("Pending data monitor stopped") - - ticker := time.NewTicker(time.Second * 30) // Check every 30 seconds - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - return - case <-h.stopChan: - return - case <-ticker.C: - h.checkForPendingData() - } - } -} - -// checkForPendingData scans cache for pending data and creates hydration tasks -func (h *Hydrator) checkForPendingData() { - // Use CacheManager's lock if available - if h.cacheManager != nil { - if !h.cacheManager.mutex.TryRLock() { - glog.Warning("[TryRLock] Hydrator.checkForPendingData: CacheManager read lock not available, skipping") - return - } - defer h.cacheManager.mutex.RUnlock() - - for userID, userData := range h.cache.Users { - // No nested locks needed since we already hold the global lock - for sourceID, sourceData := range userData.Sources { - // No nested locks needed since we already hold the global lock - - // Log source type for debugging - both local and remote should be processed - if len(sourceData.AppInfoLatestPending) > 0 { - glog.V(3).Infof("Checking pending data for user: %s, source: %s, type: %s, pending: %d", userID, sourceID, sourceData.Type, len(sourceData.AppInfoLatestPending)) - } - - // Check if there's pending data - process both local and remote sources - if len(sourceData.AppInfoLatestPending) > 0 { - glog.V(3).Infof("Found %d pending apps for user: %s, source: %s, type: %s", - len(sourceData.AppInfoLatestPending), userID, sourceID, sourceData.Type) - glog.V(3).Infof("DEBUG: About to process %d pending apps for user: %s, source: %s", len(sourceData.AppInfoLatestPending), userID, sourceID) - for i, pendingData := range sourceData.AppInfoLatestPending { - glog.V(3).Infof("DEBUG: Processing pending data %d/%d for user: %s, source: %s, pendingData: %v", i+1, len(sourceData.AppInfoLatestPending), userID, sourceID, pendingData != nil) - h.createTasksFromPendingData(userID, sourceID, pendingData) - } - } - } - } - } else { - glog.V(3).Infof("Warning: CacheManager not available for checkForPendingData") - } -} - -// createTasksFromPendingData creates hydration tasks from pending app data -func (h *Hydrator) createTasksFromPendingData(userID, sourceID string, pendingData *types.AppInfoLatestPendingData) { - if pendingData == nil { - glog.V(3).Infof("DEBUG: createTasksFromPendingData called with nil pendingData for user: %s, source: %s", userID, sourceID) - return - } - - glog.V(3).Infof("DEBUG: createTasksFromPendingData called for user: %s, source: %s, pendingData.RawData: %v", userID, sourceID, pendingData.RawData != nil) - - // For the new structure, we can work with RawData if it exists - if pendingData.RawData != nil { - // Handle regular structured RawData - appName := pendingData.RawData.Name - appID := pendingData.RawData.AppID - if appID == "" { - appID = pendingData.RawData.ID - } - - glog.V(3).Infof("DEBUG: Processing appID: %s %s for user: %s, source: %s", appID, appName, userID, sourceID) - - if appID != "" { - // Check if app is already in render failed list - if h.isAppInRenderFailedList(userID, sourceID, appID, appName) { - glog.V(3).Infof("App %s (user: %s, source: %s) is already in render failed list, skipping task creation", - appID, userID, sourceID) - return - } - - // Check if app hydration is already complete before creating new task - if h.isAppHydrationComplete(pendingData) { - glog.V(3).Infof("DEBUG: App hydration already complete for app: %s (user: %s, source: %s), skipping task creation", - appID, userID, sourceID) - return - } - - // Check if app already exists in latest queue before creating new task - // Extract version from pending data for version comparison - version := "" - if pendingData.RawData != nil { - version = pendingData.RawData.Version - } - if h.isAppInLatestQueue(userID, sourceID, appID, appName, version) { - glog.V(3).Infof("DEBUG: App already exists in latest queue for app: %s (user: %s, source: %s), skipping task creation", - appID, userID, sourceID) - return - } - - if !h.hasActiveTaskForApp(userID, sourceID, appID, appName) { - glog.V(3).Infof("DEBUG: No active task found for app: %s (user: %s, source: %s), proceeding with task creation", appID, userID, sourceID) - // Convert ApplicationInfoEntry to map for task creation - appDataMap := h.convertApplicationInfoEntryToMap(pendingData.RawData) - - if len(appDataMap) == 0 { - glog.V(3).Infof("Warning: Empty app data for app: %s (user: %s, source: %s), skipping task creation", - appID, userID, sourceID) - return - } - - // Create task with CacheManager for unified lock strategy - var cacheManager types.CacheManagerInterface - if h.cacheManager != nil { - cacheManager = h.cacheManager - } - task := hydrationfn.NewHydrationTaskWithManager( - userID, sourceID, appID, - appDataMap, h.cache, cacheManager, h.settingsManager, - ) - - if err := h.EnqueueTask(task); err != nil { - glog.Errorf("Failed to enqueue task for app: %s (user: %s, source: %s), error: %v", - appID, userID, sourceID, err) - } else { - glog.V(3).Infof("Created hydration task for structured app: %s (user: %s, source: %s)", - appID, userID, sourceID) - } - } - } - return - } -} - // isAppHydrationComplete checks if an app has completed all hydration steps func (h *Hydrator) isAppHydrationComplete(pendingData *types.AppInfoLatestPendingData) bool { @@ -683,43 +265,6 @@ func (h *Hydrator) isAppHydrationComplete(pendingData *types.AppInfoLatestPendin glog.V(2).Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - ImageAnalysis incomplete for appID=%s, name=%s, TotalImages: %d, Images: %v", appID, appName, imageAnalysis.TotalImages, imageAnalysis.Images) return false } - -// isAppDataHydrationComplete checks if an app's hydration is complete by looking up pending data in cache -func (h *Hydrator) isAppDataHydrationComplete(userID, sourceID, appID string) bool { - // Use CacheManager's lock if available - if h.cacheManager != nil { - if !h.cacheManager.mutex.TryRLock() { - glog.Warningf("[TryRlock] Hydrator.isAppDataHydrationComplete: CacheManager read lock not available for user %s, source %s, app %s, returning false", userID, sourceID, appID) - return false - } - defer h.cacheManager.mutex.RUnlock() - - userData, userExists := h.cache.Users[userID] - if !userExists { - return false - } - - sourceData, sourceExists := userData.Sources[sourceID] - if !sourceExists { - return false - } - - // Find the pending data for the specific app - for _, pendingData := range sourceData.AppInfoLatestPending { - if pendingData.RawData != nil && - (pendingData.RawData.ID == appID || pendingData.RawData.AppID == appID || pendingData.RawData.Name == appID) { - // Found the pending data for this app, check if hydration is complete - return h.isAppHydrationComplete(pendingData) - } - } - } else { - glog.V(3).Infof("Warning: CacheManager not available for isAppDataHydrationComplete") - } - - // If no pending data found for this app, consider it not hydrated - return false -} - // convertApplicationInfoEntryToMap converts ApplicationInfoEntry to map for task creation func (h *Hydrator) convertApplicationInfoEntryToMap(entry *types.ApplicationInfoEntry) map[string]interface{} { if entry == nil { @@ -880,39 +425,6 @@ func (h *Hydrator) deepCopyValue(value interface{}, visited map[uintptr]bool) in return nil } } - -// hasActiveTaskForApp checks if there's already an active task for the given app -func (h *Hydrator) hasActiveTaskForApp(userID, sourceID, appID, appName string) bool { - if !h.taskMutex.TryRLock() { - glog.Warningf("[TryRLock] Failed to acquire read lock for hasActiveTaskForApp, returning false, user: %s, source: %s, id: %s, name: %s", userID, sourceID, appID, appName) - return false - } - defer h.taskMutex.RUnlock() - - if len(h.activeTasks) > 0 { - glog.V(4).Infof("DEBUG: Checking active tasks for app: %s (user: %s, source: %s), total active tasks: %d", appID, userID, sourceID, len(h.activeTasks)) - } - - for _, task := range h.activeTasks { - if task.UserID == userID && task.SourceID == sourceID && task.AppID == appID { - glog.V(4).Infof("DEBUG: Found active task for app: %s (user: %s, source: %s), taskID: %s", appID, userID, sourceID, task.ID) - return true - } - } - glog.V(4).Infof("DEBUG: No active task found for app: %s (user: %s, source: %s)", appID, userID, sourceID) - return false -} - -// trackTask adds task to active tasks tracking -func (h *Hydrator) trackTask(task *hydrationfn.HydrationTask) { - if !h.taskMutex.TryLock() { - glog.Warningf("[TryLock] Failed to acquire lock for trackTask, skipping task tracking, task: %s, name: %s, version: %s", task.ID, task.AppName, task.AppVersion) - return - } - defer h.taskMutex.Unlock() - h.activeTasks[task.ID] = task -} - // markTaskCompleted moves task from active to completed func (h *Hydrator) markTaskCompleted(task *hydrationfn.HydrationTask, startedAt time.Time, duration time.Duration) { // Extract file path for cleanup before the lock @@ -1334,193 +846,6 @@ func CreateDefaultHydrator(cache *types.CacheData, settingsManager *settings.Set func (h *Hydrator) NotifyPendingDataUpdate(userID, sourceID string, pendingData map[string]interface{}) { glog.V(3).Infof("Hydrator.NotifyPendingDataUpdate: Pipeline handles notifications, user=%s, source=%s", userID, sourceID) } - -// createTasksFromPendingDataMap creates hydration tasks from pending data map -func (h *Hydrator) createTasksFromPendingDataMap(userID, sourceID string, pendingData map[string]interface{}) { - glog.V(3).Infof("Creating tasks from pending data for user: %s, source: %s", userID, sourceID) - - // Extract data section from pendingData - dataSection, ok := pendingData["data"] - if !ok { - glog.V(3).Infof("No data section found in pending data for user: %s, source: %s", userID, sourceID) - return - } - - // Handle different data section formats - var appsMap map[string]interface{} - - // First, try to handle the case where dataSection is an AppStoreDataSection struct - glog.V(2).Infof("Data section type: %T for user: %s, source: %s", dataSection, userID, sourceID) - - // Check if it's an AppStoreDataSection by checking if it has Apps field - if dataStruct := dataSection; dataStruct != nil { - // Use reflection or type assertion to access the Apps field - - // Try to access as map first (for backwards compatibility) - if dataMap, ok := dataSection.(map[string]interface{}); ok { - // Check if it's in the expected format with "apps" key - if apps, hasApps := dataMap["apps"]; hasApps { - if appsMapValue, ok := apps.(map[string]interface{}); ok { - appsMap = appsMapValue - glog.V(3).Infof("Found apps data in standard map format for user: %s, source: %s", userID, sourceID) - } - } else { - // Check if the dataMap itself contains app entries - if h.looksLikeAppsMap(dataMap) { - appsMap = dataMap - glog.V(3).Infof("Data section appears to contain apps directly for user: %s, source: %s", userID, sourceID) - } - } - } else { - // Try to handle AppStoreDataSection struct using interface conversion - glog.V(2).Infof("Unsupported data format for user: %s, source: %s. Expected map[string]interface{} but got %T", userID, sourceID, dataSection) - glog.V(2).Infof("Data section content: %+v", dataSection) - return - } - } - - if appsMap == nil || len(appsMap) == 0 { - glog.V(3).Infof("No apps found in pending data for user: %s, source: %s", userID, sourceID) - return - } - - glog.V(2).Infof("Found %d apps in pending data for user: %s, source: %s", len(appsMap), userID, sourceID) - - // Create hydration task for each app - for appID, appData := range appsMap { - // Validate app data - if appMap, ok := appData.(map[string]interface{}); ok { - // Check if app data contains necessary raw data fields before creating task - if !h.hasRequiredRawDataFields(appMap) { - glog.Warningf("Serial pipeline, App %s (user: %s, source: %s) missing required raw data fields, skipping task creation", - appID, userID, sourceID) - continue - } - var appName = appMap["name"].(string) - // Check if task already exists for this app to avoid duplicates - if !h.hasActiveTaskForApp(userID, sourceID, appID, appName) { - // Check if app is already in render failed list - if h.isAppInRenderFailedList(userID, sourceID, appID, appName) { - glog.Warningf("Serial pipeline, App %s (user: %s, source: %s) is already in render failed list, skipping task creation", - appID, userID, sourceID) - continue - } - - // Check if app hydration is already complete before creating new task - // Extract version from app data for version comparison - version := "" - if versionValue, exists := appMap["version"]; exists && versionValue != nil { - if versionStr, ok := versionValue.(string); ok { - version = versionStr - } - } - if h.isAppInLatestQueue(userID, sourceID, appID, appName, version) { - glog.Infof("App hydration already complete for app: %s (user: %s, source: %s), skipping task creation", - appID, userID, sourceID) - continue - } - - if len(appMap) == 0 { - glog.Warningf("Warning: Empty app data for app: %s (user: %s, source: %s), skipping task creation", - appID, userID, sourceID) - continue - } - - // Create and submit task with CacheManager for unified lock strategy - var cacheManager types.CacheManagerInterface - if h.cacheManager != nil { - cacheManager = h.cacheManager - } - task := hydrationfn.NewHydrationTaskWithManager( - userID, sourceID, appID, - appMap, h.cache, cacheManager, h.settingsManager, - ) - - if err := h.EnqueueTask(task); err != nil { - glog.Errorf("Failed to enqueue hydration task for app %s (user: %s, source: %s): %v", - appID, userID, sourceID, err) - } else { - glog.V(3).Infof("Successfully enqueued hydration task for app %s (user: %s, source: %s)", - appID, userID, sourceID) - } - } else { - glog.V(3).Infof("Task already exists for app: %s (user: %s, source: %s), skipping", appID, userID, sourceID) - } - } else { - glog.V(3).Infof("Invalid app data format for app %s (user: %s, source: %s)", appID, userID, sourceID) - } - } -} - -// hasRequiredRawDataFields checks if app data contains the minimum required fields for hydration -func (h *Hydrator) hasRequiredRawDataFields(appMap map[string]interface{}) bool { - if appMap == nil { - return false - } - - // Required fields that must be present for hydration to succeed - requiredFields := []string{"id", "name", "appID"} - - // Check if at least one of the required fields exists - hasRequiredField := false - for _, field := range requiredFields { - if value, exists := appMap[field]; exists && value != nil && value != "" { - hasRequiredField = true - break - } - } - - if !hasRequiredField { - return false - } - - // Additional recommended fields that indicate this is valid app data - recommendedFields := []string{"title", "version", "description", "chartName"} - hasRecommendedField := false - - for _, field := range recommendedFields { - if value, exists := appMap[field]; exists && value != nil && value != "" { - hasRecommendedField = true - break - } - } - - // Log warning if missing recommended fields but still proceed - if !hasRecommendedField { - glog.V(3).Infof("Warning: App data missing recommended fields (title, version, description, chartName), but proceeding with required fields") - } - - return hasRequiredField -} - -// looksLikeAppsMap checks if a map looks like it contains app entries -func (h *Hydrator) looksLikeAppsMap(data map[string]interface{}) bool { - // Sample a few entries to see if they look like app data - sampleCount := 0 - maxSamples := 3 - - for _, value := range data { - if sampleCount >= maxSamples { - break - } - - if appMap, ok := value.(map[string]interface{}); ok { - // Check if this app data has required raw data fields - if h.hasRequiredRawDataFields(appMap) { - sampleCount++ - } else { - // If this entry doesn't have required fields, it's probably not valid app data - return false - } - } else { - // Non-map entries suggest this is not an apps map - return false - } - } - - return sampleCount > 0 -} - // SetCacheManager removed: cacheManager must be provided at NewHydrator // batchCompletionProcessor processes completed tasks in batches @@ -1868,154 +1193,6 @@ func (h *Hydrator) isAppInLatestQueue(userID, sourceID, appID, appName, version glog.V(3).Infof("DEBUG: isAppInLatestQueue returning false for appID=%s, version=%s, user=%s, source=%s", appID, version, userID, sourceID) return false } - -// ForceAddTaskFromLatestData forces creation of hydration task from latest app data, skipping isAppInLatestQueue check -// This method is exposed for external use when you need to force add a task regardless of existing state -func (h *Hydrator) ForceAddTaskFromLatestData(userID, sourceID string, latestData *types.AppInfoLatestData) error { - if !h.IsRunning() { - return fmt.Errorf("hydrator is not running") - } - - if latestData == nil { - return fmt.Errorf("latest data is nil") - } - - // Extract app ID from latest data - var appID string - var appName string - if latestData.RawData != nil { - appID = latestData.RawData.AppID - appName = latestData.RawData.Name - if appID == "" { - appID = latestData.RawData.ID - } - } else if latestData.AppInfo != nil && latestData.AppInfo.AppEntry != nil { - appID = latestData.AppInfo.AppEntry.AppID - appName = latestData.AppInfo.AppEntry.Name - if appID == "" { - appID = latestData.AppInfo.AppEntry.ID - } - } else if latestData.AppSimpleInfo != nil { - appID = latestData.AppSimpleInfo.AppID - appName = latestData.AppSimpleInfo.AppName - } - - if appID == "" { - return fmt.Errorf("cannot extract app ID from latest data") - } - - // Check if task already exists for this app to avoid duplicates - if h.hasActiveTaskForApp(userID, sourceID, appID, appName) { - glog.V(3).Infof("Task already exists for app: %s (user: %s, source: %s), skipping force add", appID, userID, sourceID) - return nil - } - - // Check if app is already in render failed list - if h.isAppInRenderFailedList(userID, sourceID, appID, appName) { - glog.V(3).Infof("App %s (user: %s, source: %s) is already in render failed list, skipping force add", - appID, userID, sourceID) - return nil - } - - // Convert latest data to map for task creation - appDataMap := h.convertLatestDataToMap(latestData) - - if len(appDataMap) == 0 { - glog.V(3).Infof("Warning: Empty app data for app: %s (user: %s, source: %s), skipping task creation", - appID, userID, sourceID) - return nil - } - - // Create and submit task with CacheManager for unified lock strategy - var cacheManager types.CacheManagerInterface - if h.cacheManager != nil { - cacheManager = h.cacheManager - } - task := hydrationfn.NewHydrationTaskWithManager( - userID, sourceID, appID, - appDataMap, h.cache, cacheManager, h.settingsManager, - ) - - if err := h.EnqueueTask(task); err != nil { - glog.Errorf("Failed to enqueue force task for app: %s (user: %s, source: %s), error: %v", - appID, userID, sourceID, err) - return err - } - - glog.V(2).Infof("Successfully force added hydration task for app: %s (user: %s, source: %s)", - appID, userID, sourceID) - return nil -} - -// convertLatestDataToMap converts AppInfoLatestData to map for task creation -func (h *Hydrator) convertLatestDataToMap(latestData *types.AppInfoLatestData) map[string]interface{} { - if latestData == nil { - return make(map[string]interface{}) - } - - // Start with basic data - data := map[string]interface{}{ - "type": string(latestData.Type), - "timestamp": latestData.Timestamp, - "version": latestData.Version, - } - - // Add RawData if available - if latestData.RawData != nil { - rawDataMap := h.convertApplicationInfoEntryToMap(latestData.RawData) - // Merge raw data into main data map - for key, value := range rawDataMap { - data[key] = value - } - } - - // Add package information - if latestData.RawPackage != "" { - data["raw_package"] = latestData.RawPackage - } - if latestData.RenderedPackage != "" { - data["rendered_package"] = latestData.RenderedPackage - } - - // Add Values if available - if latestData.Values != nil && len(latestData.Values) > 0 { - valuesData := make([]map[string]interface{}, 0, len(latestData.Values)) - for _, value := range latestData.Values { - if value != nil { - valueMap := map[string]interface{}{ - "file_name": value.FileName, - "modify_type": string(value.ModifyType), - "modify_key": value.ModifyKey, - "modify_value": value.ModifyValue, - } - valuesData = append(valuesData, valueMap) - } - } - data["values"] = valuesData - } - - // Add AppInfo if available - if latestData.AppInfo != nil { - if latestData.AppInfo.AppEntry != nil { - appEntryMap := h.convertApplicationInfoEntryToMap(latestData.AppInfo.AppEntry) - // Merge app entry data - for key, value := range appEntryMap { - data[key] = value - } - } - if latestData.AppInfo.ImageAnalysis != nil { - data["image_analysis"] = latestData.AppInfo.ImageAnalysis - } - } - - // Add AppSimpleInfo if available - if latestData.AppSimpleInfo != nil { - data["app_simple_info"] = latestData.AppSimpleInfo - } - - return data -} - // isAppInRenderFailedList checks if an app already exists in the render failed list func (h *Hydrator) isAppInRenderFailedList(userID, sourceID, appID, appName string) bool { if h.cacheManager != nil { @@ -2044,17 +1221,4 @@ func (h *Hydrator) isAppInRenderFailedList(userID, sourceID, appID, appName stri } return false -} - -// ForceCheckPendingData immediately triggers checkForPendingData without waiting for the 30-second interval -// This method can be called externally to force immediate processing of pending data -func (h *Hydrator) ForceCheckPendingData() { - if !h.IsRunning() { - glog.V(3).Infof("Hydrator is not running, cannot force check pending data") - return - } - - glog.V(3).Infof("Force checking pending data triggered externally") - h.checkForPendingData() -} - +} \ No newline at end of file From 70d857b6ef54f32315596c00838a31adfb4d866c Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 2 Mar 2026 04:24:42 +0000 Subject: [PATCH 07/45] refactor: encapsulate cache lock operations into CacheManager methods Encapsulate all external lock operations on CacheManager into internal methods, following a hierarchical accessor pattern (users -> sources -> apps). New read methods (RLock internally): - GetUserIDs, GetOrCreateUserIDs, IsLocalSource - CollectAllPendingItems, FindPendingDataForApp - IsAppInLatestQueue, IsAppInRenderFailedList - SnapshotSourcePending, HasSourceData, IsAppInstalled - GetSourceOthersHash, ListActiveUsers New write methods (Lock internally): - SetUserHash, RemoveFromPendingList - UpsertLatestAndRemovePending, UpdateSourceOthers - RemoveAppFromAllSources, RemoveDelistedApps - CopyPendingVersionHistory Removed public methods: - Lock/Unlock/TryLock/RLock/RUnlock/TryRLock (lock internalized) - GetCache (no direct cache access) - GetUserDataNoLock, GetAllUsersDataWithFallback, GetUserDataWithFallback - GetLockStats, DumpLockInfo (made private) Updated CacheManagerInterface to expose semantic operations instead of lock primitives. All 13 external files updated to use the new API. Existing read methods (GetAllUsersData, GetUserData, GetSourceData) are preserved as the hierarchical accessor layer. Co-authored-by: aby913 --- internal/v2/appinfo/appinfomodule.go | 35 +- internal/v2/appinfo/cache.go | 559 ++++++++++++++++-- internal/v2/appinfo/datawatcher_app.go | 224 ++----- internal/v2/appinfo/diagnostic.go | 15 +- internal/v2/appinfo/hydration.go | 179 +----- .../v2/appinfo/hydrationfn/task_for_api.go | 61 +- internal/v2/appinfo/pipeline.go | 29 +- .../v2/appinfo/status_correction_check.go | 4 +- internal/v2/appinfo/syncer.go | 55 +- .../v2/appinfo/syncerfn/data_fetch_step.go | 100 +--- .../v2/appinfo/syncerfn/detail_fetch_step.go | 182 +----- .../appinfo/syncerfn/hash_comparison_step.go | 13 +- internal/v2/types/cache_manager.go | 34 +- pkg/v2/api/app.go | 14 +- pkg/v2/api/system.go | 22 +- 15 files changed, 652 insertions(+), 874 deletions(-) diff --git a/internal/v2/appinfo/appinfomodule.go b/internal/v2/appinfo/appinfomodule.go index 1557f27..cd7e59f 100644 --- a/internal/v2/appinfo/appinfomodule.go +++ b/internal/v2/appinfo/appinfomodule.go @@ -772,12 +772,11 @@ func (m *AppInfoModule) correctCacheWithChartRepo() error { return fmt.Errorf("cache manager not available") } - m.cacheManager.mutex.Lock() - defer m.cacheManager.mutex.Unlock() - removedCount := 0 - for userID, userData := range m.cacheManager.cache.Users { + // Build the set of delisted app IDs (apps NOT in validApps) + delistedAppIDs := make(map[string]bool) + allUsersData := m.cacheManager.GetAllUsersData() + for _, userData := range allUsersData { for sourceID, sourceData := range userData.Sources { - newLatest := sourceData.AppInfoLatest[:0] for _, app := range sourceData.AppInfoLatest { var appID string if app != nil && app.RawData != nil { @@ -789,22 +788,19 @@ func (m *AppInfoModule) correctCacheWithChartRepo() error { appID = app.RawData.Name } } - if appID != "" && validApps[sourceID] != nil { - if _, ok := validApps[sourceID][appID]; ok { - newLatest = append(newLatest, app) - } else { - removedCount++ - glog.V(3).Infof("Removed app from cache: user=%s source=%s appID=%s", userID, sourceID, appID) - } - } else { - // If appID is empty, treat as invalid and remove - removedCount++ - glog.V(3).Infof("Removed app from cache (empty appID): user=%s source=%s", userID, sourceID) + if appID == "" { + continue + } + if validApps[sourceID] == nil { + delistedAppIDs[appID] = true + } else if _, ok := validApps[sourceID][appID]; !ok { + delistedAppIDs[appID] = true } } - sourceData.AppInfoLatest = newLatest } } + + removedCount := m.cacheManager.RemoveDelistedApps(delistedAppIDs) glog.V(2).Infof("Cache correction finished, removed %d apps not in chart repo", removedCount) return nil } @@ -1372,15 +1368,14 @@ func (m *AppInfoModule) GetInvalidDataReport() map[string]interface{} { }, } - m.cacheManager.mutex.RLock() - defer m.cacheManager.mutex.RUnlock() + allUsersForReport := m.cacheManager.GetAllUsersData() totalUsers := 0 totalSources := 0 totalPendingData := 0 totalInvalidData := 0 - for userID, userData := range m.cacheManager.cache.Users { + for userID, userData := range allUsersForReport { totalUsers++ userReport := map[string]interface{}{ "sources": make(map[string]interface{}), diff --git a/internal/v2/appinfo/cache.go b/internal/v2/appinfo/cache.go index 75592d9..aac3a97 100644 --- a/internal/v2/appinfo/cache.go +++ b/internal/v2/appinfo/cache.go @@ -75,79 +75,535 @@ func (cm *CacheManager) startLockWatchdog(tag string) func() { } } -// Lock acquires the cache manager's write lock -func (cm *CacheManager) Lock() { +// GetUserIDs returns a list of all user IDs in the cache +func (cm *CacheManager) GetUserIDs() []string { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + if cm.cache == nil { + return nil + } + + ids := make([]string, 0, len(cm.cache.Users)) + for id := range cm.cache.Users { + ids = append(ids, id) + } + return ids +} + +// GetOrCreateUserIDs returns all user IDs; if none exist, creates a default user first. +func (cm *CacheManager) GetOrCreateUserIDs(defaultUserID string) []string { + cm.mutex.RLock() + ids := make([]string, 0, len(cm.cache.Users)) + for id := range cm.cache.Users { + ids = append(ids, id) + } + cm.mutex.RUnlock() + + if len(ids) > 0 { + return ids + } + cm.mutex.Lock() + defer cm.mutex.Unlock() + + // Double-check after acquiring write lock + if len(cm.cache.Users) > 0 { + for id := range cm.cache.Users { + ids = append(ids, id) + } + return ids + } + + cm.cache.Users[defaultUserID] = NewUserDataEx(defaultUserID) + glog.V(3).Infof("No existing users found, created user %s as fallback", defaultUserID) + return []string{defaultUserID} } -// Unlock releases the cache manager's write lock -func (cm *CacheManager) Unlock() { - cm.mutex.Unlock() +// IsLocalSource returns true if the given source is of local type. +func (cm *CacheManager) IsLocalSource(userID, sourceID string) bool { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + userData, exists := cm.cache.Users[userID] + if !exists { + return false + } + sourceData, exists := userData.Sources[sourceID] + if !exists { + return false + } + return sourceData.Type == types.SourceDataTypeLocal } -// TryLock attempts to acquire the cache manager's write lock without blocking -// Returns true if lock acquired, false if would block -func (cm *CacheManager) TryLock() bool { - return cm.mutex.TryLock() +// SetUserHash atomically sets the hash for a user. +func (cm *CacheManager) SetUserHash(userID, hash string) { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + if userData, exists := cm.cache.Users[userID]; exists { + userData.Hash = hash + } } -// RLock acquires the cache manager's read lock -func (cm *CacheManager) RLock() { - cm.mutex.RLock() +// RemoveFromPendingList removes an app from the pending list for the given user/source. +func (cm *CacheManager) RemoveFromPendingList(userID, sourceID, appID string) { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + userData, ok := cm.cache.Users[userID] + if !ok { + return + } + sourceData, ok := userData.Sources[sourceID] + if !ok { + return + } + + newSlice := make([]*types.AppInfoLatestPendingData, 0, len(sourceData.AppInfoLatestPending)) + for _, p := range sourceData.AppInfoLatestPending { + if p != nil && p.RawData != nil && + (p.RawData.ID == appID || p.RawData.AppID == appID || p.RawData.Name == appID) { + continue + } + newSlice = append(newSlice, p) + } + sourceData.AppInfoLatestPending = newSlice } -// RUnlock releases the cache manager's read lock -func (cm *CacheManager) RUnlock() { - cm.mutex.RUnlock() +// UpsertLatestAndRemovePending inserts or replaces an app in AppInfoLatest and removes +// it from AppInfoLatestPending. Returns the old version (if replaced), whether it was +// a replacement, and whether the user/source existed. +func (cm *CacheManager) UpsertLatestAndRemovePending( + userID, sourceID string, + latestData *types.AppInfoLatestData, + appID, appName string, +) (oldVersion string, replaced bool, ok bool) { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + userData, exists := cm.cache.Users[userID] + if !exists { + return "", false, false + } + sourceData, exists := userData.Sources[sourceID] + if !exists { + return "", false, false + } + + // Find existing app by name + existingIndex := -1 + for i, app := range sourceData.AppInfoLatest { + if app == nil { + continue + } + name := "" + if app.RawData != nil { + name = app.RawData.Name + } else if app.AppInfo != nil && app.AppInfo.AppEntry != nil { + name = app.AppInfo.AppEntry.Name + } else if app.AppSimpleInfo != nil { + name = app.AppSimpleInfo.AppName + } + if name == appName { + existingIndex = i + break + } + } + + if existingIndex >= 0 { + old := sourceData.AppInfoLatest[existingIndex] + if old.AppInfo != nil && old.AppInfo.AppEntry != nil { + oldVersion = old.AppInfo.AppEntry.Version + } + sourceData.AppInfoLatest[existingIndex] = latestData + replaced = true + } else { + sourceData.AppInfoLatest = append(sourceData.AppInfoLatest, latestData) + } + + // Remove from pending + newPending := make([]*types.AppInfoLatestPendingData, 0, len(sourceData.AppInfoLatestPending)) + for _, p := range sourceData.AppInfoLatestPending { + pID := "" + if p != nil && p.RawData != nil { + pID = p.RawData.AppID + if pID == "" { + pID = p.RawData.ID + } + if pID == "" { + pID = p.RawData.Name + } + } + if pID != appID { + newPending = append(newPending, p) + } + } + sourceData.AppInfoLatestPending = newPending + + return oldVersion, replaced, true } -// TryRLock attempts to acquire the cache manager's read lock without blocking -// Returns true if lock acquired, false if would block -func (cm *CacheManager) TryRLock() bool { - return cm.mutex.TryRLock() +// UpdateSourceOthers updates the Others data for a given sourceID across all users. +// If a user or source doesn't exist, it is created. +func (cm *CacheManager) UpdateSourceOthers(sourceID string, others *types.Others) { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + if len(cm.cache.Users) == 0 { + systemUserID := "system" + cm.cache.Users[systemUserID] = NewUserDataEx(systemUserID) + glog.V(3).Infof("No existing users found, created system user as fallback") + } + + for userID, userData := range cm.cache.Users { + if userData.Sources == nil { + userData.Sources = make(map[string]*SourceData) + } + if userData.Sources[sourceID] == nil { + userData.Sources[sourceID] = NewSourceData() + } + userData.Sources[sourceID].Others = others + glog.V(3).Infof("Updated Others data in cache for user %s, source %s", userID, sourceID) + } } -func (cm *CacheManager) GetUserDataNoLock(userID string) *UserData { - if cm.cache == nil { - return nil +// RemoveAppFromAllSources removes an app (by name) from AppInfoLatest and +// AppInfoLatestPending across all users for the given sourceID. Returns the +// total number of users affected. +func (cm *CacheManager) RemoveAppFromAllSources(appName, sourceID string) int { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + affected := 0 + for _, userData := range cm.cache.Users { + sourceData, exists := userData.Sources[sourceID] + if !exists { + continue + } + + origLatest := len(sourceData.AppInfoLatest) + origPending := len(sourceData.AppInfoLatestPending) + + newLatest := make([]*types.AppInfoLatestData, 0, origLatest) + for _, app := range sourceData.AppInfoLatest { + if app == nil || app.RawData == nil || app.RawData.Name != appName { + newLatest = append(newLatest, app) + } + } + + newPending := make([]*types.AppInfoLatestPendingData, 0, origPending) + for _, app := range sourceData.AppInfoLatestPending { + if app == nil || app.RawData == nil || app.RawData.Name != appName { + newPending = append(newPending, app) + } + } + + if len(newLatest) != origLatest || len(newPending) != origPending { + sourceData.AppInfoLatest = newLatest + sourceData.AppInfoLatestPending = newPending + affected++ + } } + return affected +} - return cm.cache.Users[userID] +// RemoveDelistedApps removes apps whose ID is in the provided set from +// AppInfoLatest across all users and sources. Returns the total removal count. +func (cm *CacheManager) RemoveDelistedApps(delistedAppIDs map[string]bool) int { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + removedCount := 0 + for userID, userData := range cm.cache.Users { + for sourceID, sourceData := range userData.Sources { + newLatest := sourceData.AppInfoLatest[:0] + for _, app := range sourceData.AppInfoLatest { + var appID string + if app != nil && app.RawData != nil { + if app.RawData.ID != "" { + appID = app.RawData.ID + } else if app.RawData.AppID != "" { + appID = app.RawData.AppID + } else if app.RawData.Name != "" { + appID = app.RawData.Name + } + } + if delistedAppIDs[appID] { + removedCount++ + glog.V(3).Infof("Removing delisted app %s from user %s source %s", appID, userID, sourceID) + } else { + newLatest = append(newLatest, app) + } + } + sourceData.AppInfoLatest = newLatest + } + } + return removedCount } -// GetUserDataWithFallback retrieves user data with fallback mechanism -// Uses TryRLock to avoid blocking - returns nil if lock is not available immediately -func (cm *CacheManager) GetUserDataWithFallback(userID string) *UserData { +// CopyPendingVersionHistory finds the pending data for the given app and copies +// its VersionHistory and AppLabels into the target ApplicationInfoEntry under write lock. +// It also overwrites the pending entry with the supplied latestData fields. +func (cm *CacheManager) CopyPendingVersionHistory( + userID, sourceID, appID, appName string, + latestData *types.AppInfoLatestData, +) error { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + userData, ok := cm.cache.Users[userID] + if !ok { + return fmt.Errorf("user %s not found", userID) + } + sourceData, ok := userData.Sources[sourceID] + if !ok { + return fmt.Errorf("source %s not found for user %s", sourceID, userID) + } + + // Find the pending data + var pendingData *types.AppInfoLatestPendingData + for _, p := range sourceData.AppInfoLatestPending { + if p == nil || p.RawData == nil { + continue + } + if p.RawData.Name == appName || p.RawData.AppID == appID || p.RawData.ID == appID { + pendingData = p + break + } + } + if pendingData == nil { + return fmt.Errorf("pendingData not found for user=%s, source=%s, app=%s, appName=%s", userID, sourceID, appID, appName) + } + + // Copy version history from pending to latest + if latestData.RawData != nil && pendingData.RawData != nil { + latestData.RawData.VersionHistory = pendingData.RawData.VersionHistory + if latestData.AppInfo != nil && latestData.AppInfo.AppEntry != nil { + latestData.AppInfo.AppEntry.VersionHistory = pendingData.RawData.VersionHistory + } + // Preserve appLabels from pendingData if latest doesn't have them + if len(pendingData.RawData.AppLabels) > 0 && len(latestData.RawData.AppLabels) == 0 { + latestData.RawData.AppLabels = pendingData.RawData.AppLabels + if latestData.AppInfo != nil && latestData.AppInfo.AppEntry != nil { + latestData.AppInfo.AppEntry.AppLabels = pendingData.RawData.AppLabels + } + } + } + + // Overwrite pending entry with latest data fields + pendingData.Type = latestData.Type + pendingData.Timestamp = latestData.Timestamp + pendingData.Version = latestData.Version + pendingData.RawData = latestData.RawData + pendingData.RawPackage = latestData.RawPackage + pendingData.Values = latestData.Values + pendingData.AppInfo = latestData.AppInfo + pendingData.RenderedPackage = latestData.RenderedPackage + pendingData.AppSimpleInfo = latestData.AppSimpleInfo + + return nil +} + +// FindPendingDataForApp finds a pending data entry by appID in the given user/source. +func (cm *CacheManager) FindPendingDataForApp(userID, sourceID, appID string) *types.AppInfoLatestPendingData { cm.mutex.RLock() defer cm.mutex.RUnlock() - if cm.cache == nil { + userData, ok := cm.cache.Users[userID] + if !ok { return nil } + sourceData, ok := userData.Sources[sourceID] + if !ok { + return nil + } + for _, p := range sourceData.AppInfoLatestPending { + if p != nil && p.RawData != nil && + (p.RawData.ID == appID || p.RawData.AppID == appID || p.RawData.Name == appID) { + return p + } + } + return nil +} - return cm.cache.Users[userID] +// IsAppInLatestQueue checks if an app (by ID) with a matching version exists in AppInfoLatest. +func (cm *CacheManager) IsAppInLatestQueue(userID, sourceID, appID, version string) bool { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + userData, ok := cm.cache.Users[userID] + if !ok { + return false + } + sourceData, ok := userData.Sources[sourceID] + if !ok { + return false + } + + for _, ld := range sourceData.AppInfoLatest { + if ld == nil { + continue + } + if ld.RawData != nil { + if ld.RawData.ID == appID || ld.RawData.AppID == appID || ld.RawData.Name == appID { + if version != "" && ld.RawData.Version != version { + continue + } + return true + } + } + if ld.AppInfo != nil && ld.AppInfo.AppEntry != nil { + if ld.AppInfo.AppEntry.ID == appID || ld.AppInfo.AppEntry.AppID == appID || ld.AppInfo.AppEntry.Name == appID { + if version != "" && ld.AppInfo.AppEntry.Version != version { + continue + } + return true + } + } + } + return false } -// GetAllUsersDataWithFallback returns all users data with fallback mechanism -// Uses TryRLock to avoid blocking - returns empty map if lock is not available immediately -func (cm *CacheManager) GetAllUsersDataWithFallback() map[string]*UserData { +// IsAppInRenderFailedList checks if an app exists in the render failed list. +func (cm *CacheManager) IsAppInRenderFailedList(userID, sourceID, appID string) bool { cm.mutex.RLock() defer cm.mutex.RUnlock() - if cm.cache == nil { - return make(map[string]*UserData) + userData, ok := cm.cache.Users[userID] + if !ok { + return false + } + sourceData, ok := userData.Sources[sourceID] + if !ok { + return false } + for _, fd := range sourceData.AppRenderFailed { + if fd.RawData != nil && + (fd.RawData.ID == appID || fd.RawData.AppID == appID || fd.RawData.Name == appID) { + return true + } + } + return false +} - result := make(map[string]*UserData) +// HasSourceData returns true if any user has non-empty AppInfoLatest or +// AppInfoLatestPending data for the given sourceID. +func (cm *CacheManager) HasSourceData(sourceID string) bool { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + for _, userData := range cm.cache.Users { + if sourceData, exists := userData.Sources[sourceID]; exists { + if len(sourceData.AppInfoLatestPending) > 0 || len(sourceData.AppInfoLatest) > 0 { + return true + } + } + } + return false +} + +// IsAppInstalled returns true if any user has the named app in a non-uninstalled +// state in AppStateLatest for the given sourceID. +func (cm *CacheManager) IsAppInstalled(sourceID, appName string) bool { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + for _, userData := range cm.cache.Users { + if sourceData, ok := userData.Sources[sourceID]; ok { + for _, appState := range sourceData.AppStateLatest { + if appState != nil && appState.Status.Name == appName && appState.Status.State != "uninstalled" { + return true + } + } + } + } + return false +} + +// GetSourceOthersHash returns the Others.Hash stored for the given sourceID +// in the first user that has a valid hash. +func (cm *CacheManager) GetSourceOthersHash(sourceID string) string { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + for _, userData := range cm.cache.Users { + if sourceData, exists := userData.Sources[sourceID]; exists { + if sourceData.Others != nil && sourceData.Others.Hash != "" { + return sourceData.Others.Hash + } + } + } + return "" +} + +// ListActiveUsers returns information about all active (existing) users. +func (cm *CacheManager) ListActiveUsers() []map[string]string { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + var usersInfo []map[string]string + for _, v := range cm.cache.Users { + if v.UserInfo != nil && v.UserInfo.Exists { + ui := map[string]string{ + "id": v.UserInfo.Id, + "name": v.UserInfo.Name, + "role": v.UserInfo.Role, + "status": v.UserInfo.Status, + } + usersInfo = append(usersInfo, ui) + } + } + return usersInfo +} + +// CollectAllPendingItems returns all non-nil pending items across all users and sources. +type PendingItem struct { + UserID string + SourceID string + Pending *types.AppInfoLatestPendingData +} + +func (cm *CacheManager) CollectAllPendingItems() []PendingItem { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + var items []PendingItem for userID, userData := range cm.cache.Users { - result[userID] = userData + for sourceID, sourceData := range userData.Sources { + for _, pd := range sourceData.AppInfoLatestPending { + if pd != nil { + items = append(items, PendingItem{userID, sourceID, pd}) + } + } + } } - return result + return items } -// GetCache returns the underlying cache data -func (cm *CacheManager) GetCache() *CacheData { - return cm.cache +// SnapshotSourcePending returns shallow copies of the pending and latest slices +// for the given user/source, safe for iteration outside the lock. +func (cm *CacheManager) SnapshotSourcePending(userID, sourceID string) ( + pending []*types.AppInfoLatestPendingData, + latest []*types.AppInfoLatestData, +) { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + userData, exists := cm.cache.Users[userID] + if !exists { + return nil, nil + } + sourceData, exists := userData.Sources[sourceID] + if !exists { + return nil, nil + } + + pending = make([]*types.AppInfoLatestPendingData, len(sourceData.AppInfoLatestPending)) + copy(pending, sourceData.AppInfoLatestPending) + latest = make([]*types.AppInfoLatestData, len(sourceData.AppInfoLatest)) + copy(latest, sourceData.AppInfoLatest) + return pending, latest } // SyncRequest represents a request to sync data to Redis @@ -1737,14 +2193,10 @@ func (cm *CacheManager) enhanceAppStateDataWithUrls(data map[string]interface{}, return enhancedData } -// GetLockStats returns current lock statistics for monitoring -func (cm *CacheManager) GetLockStats() map[string]interface{} { - glog.V(4).Infof("[LOCK] cm.lockStats.Lock() GetLockStats Start") +// getLockStats returns current lock statistics for internal monitoring +func (cm *CacheManager) getLockStats() map[string]interface{} { cm.lockStats.Lock() - defer func() { - cm.lockStats.Unlock() - glog.V(4).Infof("[LOCK] cm.lockStats.Unlock() GetLockStats End") - }() + defer cm.lockStats.Unlock() stats := make(map[string]interface{}) stats["last_lock_time"] = cm.lockStats.lastLockTime @@ -1753,7 +2205,6 @@ func (cm *CacheManager) GetLockStats() map[string]interface{} { stats["lock_count"] = cm.lockStats.lockCount stats["unlock_count"] = cm.lockStats.unlockCount - // Check for potential lock issues if cm.lockStats.lockCount > cm.lockStats.unlockCount { stats["lock_imbalance"] = cm.lockStats.lockCount - cm.lockStats.unlockCount stats["potential_deadlock"] = true @@ -1762,7 +2213,6 @@ func (cm *CacheManager) GetLockStats() map[string]interface{} { stats["potential_deadlock"] = false } - // Check if lock has been held for too long if !cm.lockStats.lastLockTime.IsZero() && cm.lockStats.lockDuration > 30*time.Second { stats["long_lock_duration"] = true stats["current_lock_duration"] = time.Since(cm.lockStats.lastLockTime) @@ -1773,15 +2223,12 @@ func (cm *CacheManager) GetLockStats() map[string]interface{} { return stats } -// DumpLockInfo prints lock stats and all goroutine stacks for diagnosing lock holders -func (cm *CacheManager) DumpLockInfo(reason string) { +// dumpLockInfo prints lock stats and all goroutine stacks for diagnosing lock holders +func (cm *CacheManager) dumpLockInfo(reason string) { glog.V(4).Infof("LOCK DIAG: reason=%s", reason) - // Print current lock stats snapshot - stats := cm.GetLockStats() + stats := cm.getLockStats() glog.V(4).Infof("LOCK DIAG: stats=%v", stats) - // Dump all goroutine stacks to identify who might be holding the lock - // Note: This is safe but can be large; only used on timeouts. buf := make([]byte, 1<<20) n := runtime.Stack(buf, true) glog.V(4).Infof("LOCK DIAG: goroutine dump (%d bytes)\n%s", n, string(buf[:n])) @@ -2224,8 +2671,8 @@ func (cm *CacheManager) ListUsers() { userList = append(userList, user) } - cm.Lock() - defer cm.Unlock() + cm.mutex.Lock() + defer cm.mutex.Unlock() if len(cm.cache.Users) == 0 { glog.V(2).Info("watch user list, cache user not exists") diff --git a/internal/v2/appinfo/datawatcher_app.go b/internal/v2/appinfo/datawatcher_app.go index 13d38bb..afedb63 100644 --- a/internal/v2/appinfo/datawatcher_app.go +++ b/internal/v2/appinfo/datawatcher_app.go @@ -143,8 +143,7 @@ func (dw *DataWatcher) processCompletedApps() { // Get all users data from cache manager with timeout var allUsersData map[string]*types.UserData - // Use fallback method with TryRLock to avoid blocking - allUsersData = dw.cacheManager.GetAllUsersDataWithFallback() + allUsersData = dw.cacheManager.GetAllUsersData() if len(allUsersData) == 0 { glog.Infof("DataWatcher: No users data found, processing cycle completed") @@ -397,9 +396,7 @@ func (dw *DataWatcher) calculateAndSetUserHashDirect(userID string, userData *ty glog.V(2).Infof("DataWatcher: Hash changed for user %s: %s -> %s", userID, currentHash, newHash) - dw.cacheManager.mutex.Lock() - originalUserData.Hash = newHash - dw.cacheManager.mutex.Unlock() + dw.cacheManager.SetUserHash(userID, newHash) glog.V(3).Infof("DataWatcher: Hash updated for user %s", userID) @@ -446,34 +443,8 @@ func (dw *DataWatcher) processSourceData(userID, sourceID string, sourceData *ty return 0, 0 } - var pendingApps []*types.AppInfoLatestPendingData - var appInfoLatest []*types.AppInfoLatestData - // Step 1: Quick check and data copy with minimal lock time - func() { - glog.V(3).Info("[LOCK] dw.cacheManager.mutex.TryRLock() @660 Start") - if !dw.cacheManager.mutex.TryRLock() { - glog.V(3).Infof("[TryRLock] processSourceData: Read lock not available for user: %s, source: %s, skipping", userID, sourceID) - return - } - defer func() { - dw.cacheManager.mutex.RUnlock() - glog.V(3).Infof("[LOCK] dw.cacheManager.mutex.RUnlock() @660 End") - }() - - // Quick check - if no pending apps, exit early - if len(sourceData.AppInfoLatestPending) == 0 { - return - } - - // Copy references to pending apps for processing - pendingApps = make([]*types.AppInfoLatestPendingData, len(sourceData.AppInfoLatestPending)) - copy(pendingApps, sourceData.AppInfoLatestPending) - - // Copy references to existing AppInfoLatest - appInfoLatest = make([]*types.AppInfoLatestData, len(sourceData.AppInfoLatest)) - copy(appInfoLatest, sourceData.AppInfoLatest) - }() + pendingApps, _ := dw.cacheManager.SnapshotSourcePending(userID, sourceID) // Early exit if no pending apps if len(pendingApps) == 0 { @@ -513,135 +484,38 @@ func (dw *DataWatcher) processSourceData(userID, sourceID string, sourceData *ty } glog.Infof("DataWatcher: user=%s source=%s completed=%d/%d apps=[%s]", userID, sourceID, len(completedApps), len(pendingApps), strings.Join(completedIDs, ",")) - // Step 3: Try to acquire write lock non-blocking and move completed apps - lockStartTime := time.Now() - - // Try to acquire write lock non-blocking with cancellation support - lockAcquired := make(chan bool, 1) - lockCancel := make(chan bool, 1) - - go func() { - glog.V(3).Info("[LOCK] dw.cacheManager.mutex.TryLock() @716 Start") - if !dw.cacheManager.mutex.TryLock() { - glog.Warningf("DataWatcher: Write lock not available for user %s, source %s, skipping app move", userID, sourceID) - return - } - defer func() { - dw.cacheManager.mutex.Unlock() - glog.V(3).Info("[LOCK] dw.cacheManager.mutex.Unlock() @725 Start") - }() - - // Check if cancelled before sending signal - select { - case <-lockCancel: - glog.V(3).Infof("DataWatcher: Write lock acquisition cancelled for user=%s, source=%s", userID, sourceID) - return - default: - } - - glog.V(3).Info("[LOCK] dw.cacheManager.mutex.Lock() @716 Success") - - // Send signal and wait for processing - select { - case lockAcquired <- true: - // Successfully sent signal, wait for cancellation - <-lockCancel - case <-lockCancel: - glog.V(3).Infof("DataWatcher: Write lock acquisition cancelled before signal for user=%s, source=%s", userID, sourceID) + // Step 3: Move completed apps from pending to latest via CacheManager + movedCount := int64(0) + for _, completedApp := range completedApps { + latestData := dw.convertPendingToLatest(completedApp) + if latestData == nil { + continue } - }() - - // Use a short timeout to avoid blocking too long - select { - case <-lockAcquired: - glog.V(3).Infof("DataWatcher: Write lock acquired for user=%s, source=%s", userID, sourceID) - - defer func() { - totalLockTime := time.Since(lockStartTime) - glog.V(3).Infof("DataWatcher: Write lock released after %v for user=%s, source=%s", totalLockTime, userID, sourceID) - // Cancel the goroutine to release the lock - close(lockCancel) - }() + appID := dw.getAppID(completedApp) + appName := dw.getAppName(completedApp) - // Move completed apps from pending to latest - movedCount := int64(0) - for _, completedApp := range completedApps { - // Convert to AppInfoLatestData - latestData := dw.convertPendingToLatest(completedApp) - if latestData != nil { - // Check if app with same name already exists in AppInfoLatest - appName := dw.getAppName(completedApp) - existingIndex := -1 - - // Find existing app with same name - for i, existingApp := range sourceData.AppInfoLatest { - if existingApp != nil { - existingAppName := dw.getAppNameFromLatest(existingApp) - if existingAppName == appName { - existingIndex = i - break - } - } - } - - if existingIndex >= 0 { - - if latestData.AppInfo.AppEntry.Version != sourceData.AppInfoLatest[existingIndex].AppInfo.AppEntry.Version { - // Send system notification for new app ready - dw.sendNewAppReadyNotification(userID, completedApp, sourceID) - glog.V(3).Infof("DataWatcher: Sent system notification for new app ready: %s", appName) - } - - // Replace existing app with same name - sourceData.AppInfoLatest[existingIndex] = latestData - glog.V(3).Infof("DataWatcher: Replaced existing app with same name: %s (index: %d)", appName, existingIndex) - - } else { - // Add new app if no existing app with same name - sourceData.AppInfoLatest = append(sourceData.AppInfoLatest, latestData) - glog.V(2).Infof("DataWatcher: Added new app to latest: %s", appName) - // Send system notification for new app ready - dw.sendNewAppReadyNotification(userID, completedApp, sourceID) - } - - movedCount++ - - } + oldVersion, replaced, ok := dw.cacheManager.UpsertLatestAndRemovePending(userID, sourceID, latestData, appID, appName) + if !ok { + continue } - // Remove completed apps from pending list - if movedCount > 0 { - newPendingList := make([]*types.AppInfoLatestPendingData, 0, len(sourceData.AppInfoLatestPending)-int(movedCount)) - completedAppIDs := make(map[string]bool) - - // Create a map of completed app IDs for efficient lookup - for _, completedApp := range completedApps { - appID := dw.getAppID(completedApp) - if appID != "" { - completedAppIDs[appID] = true - } + if replaced { + newVersion := "" + if latestData.AppInfo != nil && latestData.AppInfo.AppEntry != nil { + newVersion = latestData.AppInfo.AppEntry.Version } - - // Filter out completed apps from pending list - for _, pendingApp := range sourceData.AppInfoLatestPending { - appID := dw.getAppID(pendingApp) - if !completedAppIDs[appID] { - newPendingList = append(newPendingList, pendingApp) - } + if oldVersion != newVersion { + dw.sendNewAppReadyNotification(userID, completedApp, sourceID) } - - sourceData.AppInfoLatestPending = newPendingList - glog.Infof("DataWatcher: Updated pending list: %d -> %d apps for user=%s, source=%s", - len(sourceData.AppInfoLatestPending)+int(movedCount), len(sourceData.AppInfoLatestPending), userID, sourceID) + glog.V(3).Infof("DataWatcher: Replaced existing app: %s", appName) + } else { + glog.V(2).Infof("DataWatcher: Added new app to latest: %s", appName) + dw.sendNewAppReadyNotification(userID, completedApp, sourceID) } - - return int64(len(pendingApps)), movedCount - - case <-time.After(2 * time.Second): - close(lockCancel) // Cancel the goroutine to release the lock - glog.V(3).Infof("DataWatcher: Skipping write lock acquisition for user=%s, source=%s (timeout after 2s) - will retry in next cycle", userID, sourceID) - return int64(len(pendingApps)), 0 + movedCount++ } + + return int64(len(pendingApps)), movedCount } // isAppHydrationCompletedWithTimeout checks if app hydration is completed with timeout protection @@ -1197,55 +1071,25 @@ func (dw *DataWatcher) ProcessSingleAppToLatest(userID, sourceID string, pending appName := dw.getAppName(pendingApp) glog.V(2).Infof("Pipeline: ProcessSingleAppToLatest user=%s, source=%s, id=%s, name=%s", userID, sourceID, appID, appName) - dw.cacheManager.mutex.Lock() - defer dw.cacheManager.mutex.Unlock() - - userData, userExists := dw.cacheManager.cache.Users[userID] - if !userExists { - return false - } - sourceData, sourceExists := userData.Sources[sourceID] - if !sourceExists { + oldVersion, replaced, ok := dw.cacheManager.UpsertLatestAndRemovePending(userID, sourceID, latestData, appID, appName) + if !ok { return false } - // Check if app with same name already exists in AppInfoLatest - existingIndex := -1 - for i, existingApp := range sourceData.AppInfoLatest { - if existingApp != nil { - existingAppName := dw.getAppNameFromLatest(existingApp) - if existingAppName == appName { - existingIndex = i - break - } + if replaced { + newVersion := "" + if latestData.AppInfo != nil && latestData.AppInfo.AppEntry != nil { + newVersion = latestData.AppInfo.AppEntry.Version } - } - - if existingIndex >= 0 { - if latestData.AppInfo != nil && latestData.AppInfo.AppEntry != nil && - sourceData.AppInfoLatest[existingIndex].AppInfo != nil && - sourceData.AppInfoLatest[existingIndex].AppInfo.AppEntry != nil && - latestData.AppInfo.AppEntry.Version != sourceData.AppInfoLatest[existingIndex].AppInfo.AppEntry.Version { + if oldVersion != newVersion { dw.sendNewAppReadyNotification(userID, pendingApp, sourceID) } - sourceData.AppInfoLatest[existingIndex] = latestData glog.V(2).Infof("ProcessSingleAppToLatest: replaced existing app %s (user=%s, source=%s)", appName, userID, sourceID) } else { - sourceData.AppInfoLatest = append(sourceData.AppInfoLatest, latestData) glog.V(2).Infof("ProcessSingleAppToLatest: added new app %s (user=%s, source=%s)", appName, userID, sourceID) dw.sendNewAppReadyNotification(userID, pendingApp, sourceID) } - // Remove from pending list - newPendingList := make([]*types.AppInfoLatestPendingData, 0, len(sourceData.AppInfoLatestPending)) - for _, p := range sourceData.AppInfoLatestPending { - pID := dw.getAppID(p) - if pID != appID { - newPendingList = append(newPendingList, p) - } - } - sourceData.AppInfoLatestPending = newPendingList - atomic.AddInt64(&dw.totalAppsMoved, 1) glog.Infof("ProcessSingleAppToLatest: successfully moved app %s to Latest (user=%s, source=%s)", appName, userID, sourceID) return true diff --git a/internal/v2/appinfo/diagnostic.go b/internal/v2/appinfo/diagnostic.go index 476e968..6cbae2c 100644 --- a/internal/v2/appinfo/diagnostic.go +++ b/internal/v2/appinfo/diagnostic.go @@ -45,10 +45,7 @@ func (cm *CacheManager) DiagnoseCacheAndRedis() error { glog.Infof("Redis Keys Found: %d", len(redisKeys)) // Analyze cache state - if !cm.mutex.TryRLock() { - glog.Warningf("Diagnostic: CacheManager read lock not available, skipping cache analysis") - return fmt.Errorf("read lock not available") - } + cm.mutex.RLock() userCount := len(cm.cache.Users) totalSources := 0 issues := 0 @@ -143,10 +140,7 @@ func (cm *CacheManager) ForceReloadFromRedis() error { return err } - if !cm.mutex.TryLock() { - glog.Warningf("Diagnostic: Write lock not available for cache reload, skipping") - return fmt.Errorf("write lock not available") - } + cm.mutex.Lock() cm.cache = cache cm.mutex.Unlock() @@ -156,10 +150,7 @@ func (cm *CacheManager) ForceReloadFromRedis() error { // ValidateSourceData validates source data integrity func (cm *CacheManager) ValidateSourceData(userID, sourceID string) (*SourceAnalysis, error) { - if !cm.mutex.TryRLock() { - glog.Warningf("Diagnostic.ValidateSourceData: CacheManager read lock not available for user %s, source %s", userID, sourceID) - return nil, fmt.Errorf("read lock not available") - } + cm.mutex.RLock() defer cm.mutex.RUnlock() userData, exists := cm.cache.Users[userID] diff --git a/internal/v2/appinfo/hydration.go b/internal/v2/appinfo/hydration.go index dbf1129..a40c3b5 100644 --- a/internal/v2/appinfo/hydration.go +++ b/internal/v2/appinfo/hydration.go @@ -575,36 +575,7 @@ func (h *Hydrator) moveTaskToRenderFailed(task *hydrationfn.HydrationTask, failu return } - var pendingData *types.AppInfoLatestPendingData - if h.cacheManager != nil { - h.cacheManager.mutex.RLock() - userData, userExists := h.cache.Users[task.UserID] - if !userExists { - h.cacheManager.mutex.RUnlock() - glog.Warningf("Warning: User data not found for task: %s, user: %s", task.ID, task.UserID) - return - } - - sourceData, sourceExists := userData.Sources[task.SourceID] - if !sourceExists { - h.cacheManager.mutex.RUnlock() - glog.V(3).Infof("Warning: Source data not found for task: %s, user: %s, source: %s", task.ID, task.UserID, task.SourceID) - return - } - - // Find the pending data for this app - for _, pending := range sourceData.AppInfoLatestPending { - if pending.RawData != nil && - (pending.RawData.ID == task.AppID || pending.RawData.AppID == task.AppID || pending.RawData.Name == task.AppID) { - pendingData = pending - break - } - } - h.cacheManager.mutex.RUnlock() - } else { - glog.V(3).Infof("Warning: CacheManager not available for moveTaskToRenderFailed") - return - } + pendingData := h.cacheManager.FindPendingDataForApp(task.UserID, task.SourceID, task.AppID) if pendingData == nil { glog.V(3).Infof("Warning: Pending data not found for task: %s, app: %s", task.ID, task.AppID) @@ -635,50 +606,8 @@ func (h *Hydrator) removeFromPendingList(userID, sourceID, appID, appName, appVe glog.V(3).Infof("Warning: CacheManager not available for removeFromPendingList") return } - - h.cacheManager.mutex.Lock() - defer h.cacheManager.mutex.Unlock() - - userData, userExists := h.cache.Users[userID] - if !userExists { - return - } - sourceData, sourceExists := userData.Sources[sourceID] - if !sourceExists { - return - } - removeIdx := -1 - for i, pending := range sourceData.AppInfoLatestPending { - if pending != nil && pending.RawData != nil && - (pending.RawData.ID == appID || pending.RawData.AppID == appID || pending.RawData.Name == appID) { - removeIdx = i - break - } - } - if removeIdx == -1 { - return - } - - // Re-validate pointers under write-lock - if userData2, ok := h.cache.Users[userID]; ok { - if sourceData2, ok2 := userData2.Sources[sourceID]; ok2 { - if removeIdx >= 0 && removeIdx < len(sourceData2.AppInfoLatestPending) { - // Re-check match to be safe - p := sourceData2.AppInfoLatestPending[removeIdx] - if p != nil && p.RawData != nil && (p.RawData.ID == appID || p.RawData.AppID == appID || p.RawData.Name == appID) { - // Create new slice dropping index removeIdx - old := sourceData2.AppInfoLatestPending - newSlice := make([]*types.AppInfoLatestPendingData, 0, len(old)-1) - newSlice = append(newSlice, old[:removeIdx]...) - if removeIdx+1 <= len(old)-1 { - newSlice = append(newSlice, old[removeIdx+1:]...) - } - sourceData2.AppInfoLatestPending = newSlice - glog.V(2).Infof("Removed app %s from pending list for user: %s, source: %s", appID, userID, sourceID) - } - } - } - } + h.cacheManager.RemoveFromPendingList(userID, sourceID, appID) + glog.V(2).Infof("Removed app %s from pending list for user: %s, source: %s", appID, userID, sourceID) } // GetMetrics returns hydrator metrics @@ -1119,106 +1048,20 @@ func (h *Hydrator) cleanupOldTasks() { func (h *Hydrator) isAppInLatestQueue(userID, sourceID, appID, appName, version string) bool { glog.V(3).Infof("DEBUG: isAppInLatestQueue checking appID=%s %s, version=%s for user=%s, source=%s", appID, appName, version, userID, sourceID) - if h.cacheManager != nil { - h.cacheManager.mutex.RLock() - defer h.cacheManager.mutex.RUnlock() - - userData, userExists := h.cache.Users[userID] - if !userExists { - return false - } - - sourceData, sourceExists := userData.Sources[sourceID] - if !sourceExists { - return false - } - - // Check if app exists in AppInfoLatest queue - for _, latestData := range sourceData.AppInfoLatest { - if latestData == nil { - continue - } - - // Check RawData first - if latestData.RawData != nil { - if latestData.RawData.ID == appID || - latestData.RawData.AppID == appID || - latestData.RawData.Name == appID { - // Add version comparison - only return true if versions match - if version != "" && latestData.RawData.Version != version { - glog.V(3).Infof("App %s found in latest queue but version mismatch: current=%s, latest=%s, skipping", - appID, version, latestData.RawData.Version) - continue - } - glog.V(3).Infof("App %s found in latest queue with matching version: %s", appID, version) - return true - } - } - - // Check AppInfo.AppEntry - if latestData.AppInfo != nil && latestData.AppInfo.AppEntry != nil { - if latestData.AppInfo.AppEntry.ID == appID || - latestData.AppInfo.AppEntry.AppID == appID || - latestData.AppInfo.AppEntry.Name == appID { - // Add version comparison - only return true if versions match - if version != "" && latestData.AppInfo.AppEntry.Version != version { - glog.V(3).Infof("App %s found in latest queue but version mismatch: current=%s, latest=%s, skipping", - appID, version, latestData.AppInfo.AppEntry.Version) - continue - } - glog.V(3).Infof("App %s found in latest queue with matching version: %s", appID, version) - return true - } - } - - // Check AppSimpleInfo - if latestData.AppSimpleInfo != nil { - if latestData.AppSimpleInfo.AppID == appID || - latestData.AppSimpleInfo.AppName == appID { - // For AppSimpleInfo, we may not have version info, so only check if version is empty - if version == "" { - glog.V(3).Infof("App %s found in latest queue (AppSimpleInfo)", appID) - return true - } - // If version is provided but AppSimpleInfo doesn't have version, skip - glog.V(3).Infof("App %s found in latest queue but AppSimpleInfo has no version info, skipping", appID) - continue - } - } - } - } else { + if h.cacheManager == nil { glog.V(3).Infof("Warning: CacheManager not available for isAppInLatestQueue") + return false } - glog.V(3).Infof("DEBUG: isAppInLatestQueue returning false for appID=%s, version=%s, user=%s, source=%s", appID, version, userID, sourceID) - return false + result := h.cacheManager.IsAppInLatestQueue(userID, sourceID, appID, version) + glog.V(3).Infof("DEBUG: isAppInLatestQueue returning %v for appID=%s, version=%s, user=%s, source=%s", result, appID, version, userID, sourceID) + return result } // isAppInRenderFailedList checks if an app already exists in the render failed list func (h *Hydrator) isAppInRenderFailedList(userID, sourceID, appID, appName string) bool { - if h.cacheManager != nil { - h.cacheManager.mutex.RLock() - defer h.cacheManager.mutex.RUnlock() - - userData, userExists := h.cache.Users[userID] - if !userExists { - return false - } - - sourceData, sourceExists := userData.Sources[sourceID] - if !sourceExists { - return false - } - - // Check if app exists in render failed list - for _, failedData := range sourceData.AppRenderFailed { - if failedData.RawData != nil && - (failedData.RawData.ID == appID || failedData.RawData.AppID == appID || failedData.RawData.Name == appID) { - return true - } - } - } else { + if h.cacheManager == nil { glog.V(2).Infof("Warning: CacheManager not available for isAppInRenderFailedList") + return false } - - return false + return h.cacheManager.IsAppInRenderFailedList(userID, sourceID, appID) } \ No newline at end of file diff --git a/internal/v2/appinfo/hydrationfn/task_for_api.go b/internal/v2/appinfo/hydrationfn/task_for_api.go index fd7fe76..cd4d682 100644 --- a/internal/v2/appinfo/hydrationfn/task_for_api.go +++ b/internal/v2/appinfo/hydrationfn/task_for_api.go @@ -164,55 +164,34 @@ func (s *TaskForApiStep) writeAppDataToCache(task *HydrationTask, appData interf return fmt.Errorf("app_data is not in expected format, app=%s, appName=%s", task.AppID, task.AppName) } - if task.CacheManager != nil { - task.CacheManager.Lock() - defer task.CacheManager.Unlock() + if task.CacheManager == nil { + return fmt.Errorf("CacheManager not available for fixVersionHistoryFromPendingData") } - // Find the pendingData in cache - pendingData := s.findPendingDataFromCache(task) - if pendingData == nil { - return fmt.Errorf("pendingData not found in cache for user=%s, source=%s, app=%s, appName=%s", task.UserID, task.SourceID, task.AppID, task.AppName) - } - - // Fix version history data - appInfoLatest.RawData.VersionHistory = pendingData.RawData.VersionHistory - appInfoLatest.AppInfo.AppEntry.VersionHistory = pendingData.RawData.VersionHistory - - // Preserve appLabels from pendingData if chartrepo didn't return them or returned empty array - // This is critical for delisted apps (with suspend/remove labels) that are still installed - if pendingData.RawData != nil && len(pendingData.RawData.AppLabels) > 0 { - // Check if chartrepo returned appLabels - chartrepoHasLabels := false - if appDataMap, ok := appData.(map[string]interface{}); ok { - if appInfoMap, ok := appDataMap["app_info"].(map[string]interface{}); ok { - if appEntryMap, ok := appInfoMap["app_entry"].(map[string]interface{}); ok { - if appLabels, ok := appEntryMap["appLabels"].([]interface{}); ok && len(appLabels) > 0 { - chartrepoHasLabels = true - } + // Check if chartrepo returned appLabels before taking the lock + chartrepoHasLabels := false + if appDataMap, ok := appData.(map[string]interface{}); ok { + if appInfoMap, ok := appDataMap["app_info"].(map[string]interface{}); ok { + if appEntryMap, ok := appInfoMap["app_entry"].(map[string]interface{}); ok { + if appLabels, ok := appEntryMap["appLabels"].([]interface{}); ok && len(appLabels) > 0 { + chartrepoHasLabels = true } } } - - // If chartrepo didn't return labels, preserve from pendingData - if !chartrepoHasLabels { - appInfoLatest.RawData.AppLabels = pendingData.RawData.AppLabels - appInfoLatest.AppInfo.AppEntry.AppLabels = pendingData.RawData.AppLabels + } + if chartrepoHasLabels { + // Clear the AppLabels so CopyPendingVersionHistory won't overwrite them + // (it only copies when latest has empty labels) + } else if appInfoLatest.RawData != nil { + appInfoLatest.RawData.AppLabels = nil + if appInfoLatest.AppInfo != nil && appInfoLatest.AppInfo.AppEntry != nil { + appInfoLatest.AppInfo.AppEntry.AppLabels = nil } } - // Overwrite all fields of pendingData (keep the pointer address, update all contents) - pendingData.Type = appInfoLatest.Type - pendingData.Timestamp = appInfoLatest.Timestamp - pendingData.Version = appInfoLatest.Version - pendingData.RawData = appInfoLatest.RawData - pendingData.RawPackage = appInfoLatest.RawPackage - pendingData.Values = appInfoLatest.Values - pendingData.AppInfo = appInfoLatest.AppInfo - pendingData.RenderedPackage = appInfoLatest.RenderedPackage - pendingData.AppSimpleInfo = appInfoLatest.AppSimpleInfo - - return nil + return task.CacheManager.CopyPendingVersionHistory( + task.UserID, task.SourceID, task.AppID, task.AppName, appInfoLatest, + ) } // findPendingDataFromCache finds AppInfoLatestPendingData from cache based on task information diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go index 5abbbf6..e92ec4c 100644 --- a/internal/v2/appinfo/pipeline.go +++ b/internal/v2/appinfo/pipeline.go @@ -157,24 +157,7 @@ func (p *Pipeline) phaseHydrateApps(ctx context.Context) map[string]bool { return affectedUsers } - type pendingItem struct { - userID string - sourceID string - pending *types.AppInfoLatestPendingData - } - - p.cacheManager.mutex.RLock() - var items []pendingItem - for userID, userData := range p.cache.Users { - for sourceID, sourceData := range userData.Sources { - for _, pd := range sourceData.AppInfoLatestPending { - if pd != nil { - items = append(items, pendingItem{userID, sourceID, pd}) - } - } - } - } - p.cacheManager.mutex.RUnlock() + items := p.cacheManager.CollectAllPendingItems() if len(items) == 0 { return affectedUsers @@ -192,15 +175,15 @@ func (p *Pipeline) phaseHydrateApps(ctx context.Context) map[string]bool { default: } - appID, appName := getAppIdentifiers(item.pending) + appID, appName := getAppIdentifiers(item.Pending) glog.V(2).Infof("Pipeline Phase 2: [%d/%d] %s %s (user=%s, source=%s)", - idx+1, total, appID, appName, item.userID, item.sourceID) + idx+1, total, appID, appName, item.UserID, item.SourceID) - hydrated := p.hydrator.HydrateSingleApp(ctx, item.userID, item.sourceID, item.pending) + hydrated := p.hydrator.HydrateSingleApp(ctx, item.UserID, item.SourceID, item.Pending) if hydrated && p.dataWatcher != nil { - p.dataWatcher.ProcessSingleAppToLatest(item.userID, item.sourceID, item.pending) + p.dataWatcher.ProcessSingleAppToLatest(item.UserID, item.SourceID, item.Pending) } - affectedUsers[item.userID] = true + affectedUsers[item.UserID] = true } return affectedUsers diff --git a/internal/v2/appinfo/status_correction_check.go b/internal/v2/appinfo/status_correction_check.go index 7eb1844..e5c7c9d 100644 --- a/internal/v2/appinfo/status_correction_check.go +++ b/internal/v2/appinfo/status_correction_check.go @@ -274,9 +274,7 @@ func (scc *StatusCorrectionChecker) performStatusCheck() { glog.Errorf("StatusCorrectionChecker: failed to calculate hash for user %s: %v", userID, err) continue } - scc.cacheManager.mutex.Lock() - userData.Hash = newHash - scc.cacheManager.mutex.Unlock() + scc.cacheManager.SetUserHash(userID, newHash) glog.V(2).Infof("StatusCorrectionChecker: user %s hash updated to %s", userID, newHash) } // Force sync after hash update diff --git a/internal/v2/appinfo/syncer.go b/internal/v2/appinfo/syncer.go index ea92ddb..0cbe13c 100644 --- a/internal/v2/appinfo/syncer.go +++ b/internal/v2/appinfo/syncer.go @@ -485,33 +485,11 @@ func (s *Syncer) executeSyncCycleWithSource(ctx context.Context, source *setting sourceID := source.ID // Use market source name as source ID glog.V(3).Infof("Using source ID: %s for data storage", sourceID) - // Get all existing user IDs with minimal locking + // Get all existing user IDs, creating a system user if none exist var userIDs []string if cacheManager := s.cacheManager.Load(); cacheManager != nil { - cacheManager.mutex.RLock() - for userID := range s.cache.Users { - userIDs = append(userIDs, userID) - } - cacheManager.mutex.RUnlock() - - if len(userIDs) == 0 { - cacheManager.mutex.Lock() - // Double-check after acquiring write lock - if len(s.cache.Users) == 0 { - systemUserID := "system" - s.cache.Users[systemUserID] = NewUserDataEx(systemUserID) // NewUserData() - userIDs = append(userIDs, systemUserID) - glog.V(3).Infof("No existing users found, created system user as fallback") - } else { - // Users were added by another goroutine - for userID := range s.cache.Users { - userIDs = append(userIDs, userID) - } - } - cacheManager.mutex.Unlock() - } + userIDs = cacheManager.GetOrCreateUserIDs("system") } else { - // Fallback to direct cache access without lock (not recommended) glog.V(3).Info("Warning: CacheManager not available, using direct cache access") for userID := range s.cache.Users { userIDs = append(userIDs, userID) @@ -540,13 +518,16 @@ func (s *Syncer) executeSyncCycleWithSource(ctx context.Context, source *setting // storeDataDirectly stores data directly to cache without going through CacheManager func (s *Syncer) storeDataDirectly(userID, sourceID string, completeData map[string]interface{}) { - if cacheManager := s.cacheManager.Load(); cacheManager != nil { - cacheManager.mutex.Lock() - defer cacheManager.mutex.Unlock() - } else { - // Fallback: no lock protection (not recommended) + cacheManager := s.cacheManager.Load() + if cacheManager == nil { glog.V(3).Infof("Warning: CacheManager not available for storeDataDirectly") + return } + // TODO: refactor storeDataDirectly to use CacheManager write methods + // For now, use the internal mutex directly as this function contains + // complex parsing logic (~250 lines) that runs under the lock. + cacheManager.mutex.Lock() + defer cacheManager.mutex.Unlock() userData := s.cache.Users[userID] @@ -814,20 +795,10 @@ func (s *Syncer) storeDataViaCacheManager(userIDs []string, sourceID string, com for _, userID := range userIDs { // Check if the source is local type - skip syncer operations for local sources if cacheManager := s.cacheManager.Load(); cacheManager != nil { - cacheManager.mutex.RLock() - userData, userExists := s.cache.Users[userID] - if userExists { - sourceData, sourceExists := userData.Sources[sourceID] - if sourceExists { - sourceType := sourceData.Type - if sourceType == types.SourceDataTypeLocal { - glog.V(3).Infof("Skipping syncer CacheManager operation for local source: user=%s, source=%s", userID, sourceID) - cacheManager.mutex.RUnlock() - continue - } - } + if cacheManager.IsLocalSource(userID, sourceID) { + glog.V(3).Infof("Skipping syncer CacheManager operation for local source: user=%s, source=%s", userID, sourceID) + continue } - cacheManager.mutex.RUnlock() } // Use CacheManager.SetAppData to trigger hydration notifications if available diff --git a/internal/v2/appinfo/syncerfn/data_fetch_step.go b/internal/v2/appinfo/syncerfn/data_fetch_step.go index 5230b94..ee6aa02 100644 --- a/internal/v2/appinfo/syncerfn/data_fetch_step.go +++ b/internal/v2/appinfo/syncerfn/data_fetch_step.go @@ -106,38 +106,19 @@ func (d *DataFetchStep) CanSkip(ctx context.Context, data *SyncContext) bool { // Check if we have existing data in cache for THIS specific source only hasExistingData := false - if data.Cache != nil { - // Use CacheManager's lock for unified lock strategy - if data.CacheManager != nil { - data.CacheManager.RLock() - for userID, userData := range data.Cache.Users { - // Only check data for the current market source - if sourceData, exists := userData.Sources[sourceID]; exists { - if len(sourceData.AppInfoLatestPending) > 0 || len(sourceData.AppInfoLatest) > 0 { - hasExistingData = true - glog.V(2).Infof("Found existing data for source:%s user:%s (pending:%d latest:%d)", - sourceID, userID, len(sourceData.AppInfoLatestPending), len(sourceData.AppInfoLatest)) - break - } - } - } - data.CacheManager.RUnlock() - } else { - // Fallback to SyncContext's mutex if CacheManager is not available - data.mutex.RLock() - for userID, userData := range data.Cache.Users { - // Only check data for the current market source - if sourceData, exists := userData.Sources[sourceID]; exists { - if len(sourceData.AppInfoLatestPending) > 0 || len(sourceData.AppInfoLatest) > 0 { - hasExistingData = true - glog.V(2).Infof("Found existing data for source:%s user:%s (pending:%d latest:%d)", - sourceID, userID, len(sourceData.AppInfoLatestPending), len(sourceData.AppInfoLatest)) - break - } + if data.CacheManager != nil { + hasExistingData = data.CacheManager.HasSourceData(sourceID) + } else if data.Cache != nil { + data.mutex.RLock() + for _, userData := range data.Cache.Users { + if sourceData, exists := userData.Sources[sourceID]; exists { + if len(sourceData.AppInfoLatestPending) > 0 || len(sourceData.AppInfoLatest) > 0 { + hasExistingData = true + break } } - data.mutex.RUnlock() } + data.mutex.RUnlock() } // Skip only if hashes match AND we have existing data for THIS specific source @@ -657,64 +638,11 @@ func (d *DataFetchStep) updateOthersInCache(data *SyncContext, others *types.Oth // Get source ID from market source - use Name to match syncer.go behavior sourceID := data.MarketSource.ID - // Use CacheManager's lock for unified lock strategy if data.CacheManager != nil { - data.CacheManager.Lock() - defer data.CacheManager.Unlock() - } - - // Get all existing user IDs - var userIDs []string - for userID := range data.Cache.Users { - userIDs = append(userIDs, userID) - } - - // If no users exist, create a system user as fallback - if len(userIDs) == 0 { - systemUserID := "system" - data.Cache.Users[systemUserID] = types.NewUserData() - userIDs = append(userIDs, systemUserID) - glog.V(3).Infof("No existing users found, created system user as fallback") - } - - glog.V(3).Infof("Updating Others data for %d users: %v, sourceID: %s", len(userIDs), userIDs, sourceID) - - // Update Others for each user - for _, userID := range userIDs { - userData := data.Cache.Users[userID] - - // Ensure source data exists for this user - if userData.Sources == nil { - userData.Sources = make(map[string]*types.SourceData) - } - - if userData.Sources[sourceID] == nil { - userData.Sources[sourceID] = types.NewSourceData() - } - - sourceData := userData.Sources[sourceID] - - // Update Others in SourceData - sourceData.Others = others - - // Log details about the saved recommends data - if sourceData.Others != nil && len(sourceData.Others.Recommends) > 0 { - glog.V(3).Infof("DEBUG: Saved %d recommends to cache for user %s, source %s", - len(sourceData.Others.Recommends), userID, sourceID) - for i, rec := range sourceData.Others.Recommends { - glog.V(3).Infof("DEBUG: Saved recommend[%d] '%s', has Data: %v", - i, rec.Name, rec.Data != nil) - if rec.Data != nil { - glog.V(3).Infof("DEBUG: Saved recommend[%d] Data.Title count: %d, Data.Description count: %d", - i, len(rec.Data.Title), len(rec.Data.Description)) - } - } - } else { - glog.V(3).Infof("DEBUG: No recommends data saved to cache for user %s, source %s", userID, sourceID) - } - - glog.V(3).Infof("Updated Others data in cache for user %s, source %s", userID, sourceID) + data.CacheManager.UpdateSourceOthers(sourceID, others) + } else { + glog.Warning("CacheManager not available, cannot update Others in cache") } - glog.V(2).Infof("Successfully updated Others data for all %d users, source %s", len(userIDs), sourceID) + glog.V(2).Infof("Successfully updated Others data for source %s", sourceID) } diff --git a/internal/v2/appinfo/syncerfn/detail_fetch_step.go b/internal/v2/appinfo/syncerfn/detail_fetch_step.go index 45fe8ce..05ff09a 100644 --- a/internal/v2/appinfo/syncerfn/detail_fetch_step.go +++ b/internal/v2/appinfo/syncerfn/detail_fetch_step.go @@ -3,7 +3,6 @@ package syncerfn import ( "context" "fmt" - "reflect" "strings" "time" @@ -588,159 +587,8 @@ func (d *DetailFetchStep) removeAppFromCache(appID string, appInfoMap map[string return } - glog.V(2).Infof("Step 1: Acquiring read lock to find data for removal") - data.CacheManager.RLock() - - // Collect all data that needs to be removed - type RemovalData struct { - userID string - sourceID string - newLatestList []*types.AppInfoLatestData - newPendingList []*types.AppInfoLatestPendingData - originalLatestCount int - originalPendingCount int - } - - var removals []RemovalData - - glog.V(3).Infof("Processing %d users for app removal (read phase)", len(data.Cache.Users)) - - for userID, userData := range data.Cache.Users { - sourceData, sourceExists := userData.Sources[sourceID] - if !sourceExists { - continue - } - - // Create new lists without the target app (all versions) - var newLatestList []*types.AppInfoLatestData - var newPendingList []*types.AppInfoLatestPendingData - - // Filter latest list - remove ALL versions of the app by name - for _, latestApp := range sourceData.AppInfoLatest { - if latestApp == nil || latestApp.RawData == nil { - newLatestList = append(newLatestList, latestApp) - continue - } - // Remove all versions of the app with matching name - if latestApp.RawData.Name != appName { - newLatestList = append(newLatestList, latestApp) - } else { - glog.V(3).Infof("Removing app version %s (name: %s) from AppInfoLatest", latestApp.RawData.Version, appName) - } - } - - // Filter pending list - remove ALL versions of the app by name - for _, pendingApp := range sourceData.AppInfoLatestPending { - if pendingApp == nil || pendingApp.RawData == nil { - newPendingList = append(newPendingList, pendingApp) - continue - } - // Remove all versions of the app with matching name - if pendingApp.RawData.Name != appName { - newPendingList = append(newPendingList, pendingApp) - } else { - glog.V(3).Infof("Removing pending app version %s (name: %s) from AppInfoLatestPending", pendingApp.RawData.Version, appName) - } - } - - // Only add to removals if there were actually items to remove - if len(newLatestList) != len(sourceData.AppInfoLatest) || len(newPendingList) != len(sourceData.AppInfoLatestPending) { - removals = append(removals, RemovalData{ - userID: userID, - sourceID: sourceID, - newLatestList: newLatestList, - newPendingList: newPendingList, - originalLatestCount: len(sourceData.AppInfoLatest), - originalPendingCount: len(sourceData.AppInfoLatestPending), - }) - } - } - - if len(removals) > 0 { - glog.V(2).Infof("Step 1 completed: Found %d users with data to remove", len(removals)) - } - - // Release read lock before acquiring write lock (must release manually since we need to acquire write lock) - data.CacheManager.RUnlock() - - // Step 2: Use try write lock to quickly update the data - if len(removals) == 0 { - glog.V(3).Infof("No data found to remove for app: %s", appID) - return - } - - glog.V(2).Info("Step 2: Acquiring write lock to update data") - data.CacheManager.Lock() - defer data.CacheManager.Unlock() - - // Collect sync requests to trigger after releasing the lock - type SyncReq struct { - userID string - sourceID string - } - var syncReqs []SyncReq - - // Quickly update all the data by replacing array pointers - for _, removal := range removals { - userData := data.Cache.Users[removal.userID] - sourceData := userData.Sources[removal.sourceID] - - // Replace array pointers (atomic operation) - sourceData.AppInfoLatest = removal.newLatestList - sourceData.AppInfoLatestPending = removal.newPendingList - - glog.V(3).Infof("Updated user: %s, source: %s, app: %s (latest: %d->%d, pending: %d->%d)", - removal.userID, removal.sourceID, appName, - removal.originalLatestCount, len(removal.newLatestList), - removal.originalPendingCount, len(removal.newPendingList)) - - // Collect sync request - syncReqs = append(syncReqs, SyncReq{ - userID: removal.userID, - sourceID: removal.sourceID, - }) - } - - glog.V(3).Infof("App removal from cache completed for app: %s %s", appID, appName) - - // Trigger sync to Redis for all affected users and sources after releasing the lock - // Use reflection to access the private requestSync method - // We do this in a goroutine to avoid blocking and to ensure the lock is released first - go func() { - // Wait a bit to ensure the lock is released - time.Sleep(10 * time.Millisecond) - - cmValue := reflect.ValueOf(data.CacheManager) - if cmValue.Kind() == reflect.Ptr { - cmValue = cmValue.Elem() - } - - requestSyncMethod := cmValue.MethodByName("requestSync") - if !requestSyncMethod.IsValid() { - glog.V(3).Infof("Warning: Cannot find requestSync method in CacheManager, sync to Redis will be handled by StoreCompleteDataToPending") - return - } - - // SyncSource = 1 (based on iota: SyncUser=0, SyncSource=1) - const SyncSource = 1 - - for _, syncReq := range syncReqs { - // Create SyncRequest struct value - // SyncRequest has: UserID string, SourceID string, Type SyncType (int) - syncRequestValue := reflect.New(reflect.TypeOf(struct { - UserID string - SourceID string - Type int - }{})).Elem() - syncRequestValue.Field(0).SetString(syncReq.userID) - syncRequestValue.Field(1).SetString(syncReq.sourceID) - syncRequestValue.Field(2).SetInt(SyncSource) - - // Call requestSync method - requestSyncMethod.Call([]reflect.Value{syncRequestValue}) - glog.V(3).Infof("Triggered sync to Redis for user: %s, source: %s, app: %s", syncReq.userID, syncReq.sourceID, appName) - } - }() + affected := data.CacheManager.RemoveAppFromAllSources(appName, sourceID) + glog.V(3).Infof("App removal from cache completed for app: %s %s, affected %d users", appID, appName, affected) } // cleanupSuspendedAppsFromLatestData checks all apps in LatestData.Data.Apps for suspend/remove labels @@ -995,30 +843,8 @@ func (d *DetailFetchStep) preserveFieldsForDelistedApp(originalMap, detailMap ma // isAppInstalled determines whether the given app is currently installed for the active source. func (d *DetailFetchStep) isAppInstalled(appName, sourceID string, data *SyncContext) bool { - if appName == "" || sourceID == "" || data == nil || data.Cache == nil || data.CacheManager == nil { + if appName == "" || sourceID == "" || data == nil || data.CacheManager == nil { return false } - - data.CacheManager.RLock() - defer data.CacheManager.RUnlock() - - for _, userData := range data.Cache.Users { - if userData == nil { - continue - } - sourceData, ok := userData.Sources[sourceID] - if !ok || sourceData == nil { - continue - } - for _, appState := range sourceData.AppStateLatest { - if appState == nil { - continue - } - if appState.Status.Name == appName && appState.Status.State != "uninstalled" { - return true - } - } - } - - return false + return data.CacheManager.IsAppInstalled(sourceID, appName) } diff --git a/internal/v2/appinfo/syncerfn/hash_comparison_step.go b/internal/v2/appinfo/syncerfn/hash_comparison_step.go index 8a41b78..5b98268 100644 --- a/internal/v2/appinfo/syncerfn/hash_comparison_step.go +++ b/internal/v2/appinfo/syncerfn/hash_comparison_step.go @@ -87,11 +87,16 @@ func (h *HashComparisonStep) Execute(ctx context.Context, data *SyncContext) err data.RemoteHash = hashResponse.Hash - // Calculate local hash with proper locking + // Calculate local hash if data.CacheManager != nil { - data.CacheManager.RLock() - data.LocalHash = h.calculateLocalHash(data.Cache, data.GetMarketSource()) - data.CacheManager.RUnlock() + data.LocalHash = data.CacheManager.GetSourceOthersHash(marketSource.ID) + if data.LocalHash == "" { + if data.Cache == nil || len(data.Cache.Users) == 0 { + data.LocalHash = "empty_cache_no_users" + } else { + data.LocalHash = "no_source_hash" + } + } } // Compare hashes and set result diff --git a/internal/v2/types/cache_manager.go b/internal/v2/types/cache_manager.go index 1683e30..eb2b1ba 100644 --- a/internal/v2/types/cache_manager.go +++ b/internal/v2/types/cache_manager.go @@ -3,26 +3,20 @@ package types // CacheManagerInterface defines the interface for cache management operations // This interface is used to avoid circular imports between packages type CacheManagerInterface interface { - // Lock acquires the cache manager's write lock - Lock() + // Hierarchical read accessors + GetAllUsersData() map[string]*UserData + GetUserData(userID string) *UserData + GetSourceData(userID, sourceID string) *SourceData + GetUserIDs() []string - // Unlock releases the cache manager's write lock - Unlock() + // Specific read queries + HasSourceData(sourceID string) bool + IsAppInstalled(sourceID, appName string) bool + GetSourceOthersHash(sourceID string) string + FindPendingDataForApp(userID, sourceID, appID string) *AppInfoLatestPendingData - // TryLock attempts to acquire the cache manager's write lock without blocking - // Returns true if lock acquired, false if would block - TryLock() bool - - // RLock acquires the cache manager's read lock - RLock() - - // RUnlock releases the cache manager's read lock - RUnlock() - - // TryRLock attempts to acquire the cache manager's read lock without blocking - // Returns true if lock acquired, false if would block - TryRLock() bool - - // GetCache returns the underlying cache data - GetCache() *CacheData + // Write operations + UpdateSourceOthers(sourceID string, others *Others) + RemoveAppFromAllSources(appName, sourceID string) int + CopyPendingVersionHistory(userID, sourceID, appID, appName string, latestData *AppInfoLatestData) error } diff --git a/pkg/v2/api/app.go b/pkg/v2/api/app.go index 379f6db..c8d8447 100644 --- a/pkg/v2/api/app.go +++ b/pkg/v2/api/app.go @@ -277,7 +277,7 @@ func (s *Server) getAppsInfo(w http.ResponseWriter, r *http.Request) { }() // Get user data from cache - userData := s.cacheManager.GetUserDataNoLock(userID) + userData := s.cacheManager.GetUserData(userID) if userData == nil { glog.V(3).Infof("User data not found for user: %s", userID) resultChan <- result{err: fmt.Errorf("user data not found")} @@ -629,20 +629,17 @@ func (s *Server) getMarketHash(w http.ResponseWriter, r *http.Request) { } }() - // Get user data from cache with fallback (non-blocking) - userData := s.cacheManager.GetUserDataNoLock(userID) + userData := s.cacheManager.GetUserData(userID) if userData == nil { glog.Warningf("User data not found for user: %s, attempting to resync user data", userID) - // Try to resync user data to fix missing user information if err := s.cacheManager.ResynceUser(); err != nil { glog.Errorf("Failed to resync user data for user %s: %v", userID, err) resultChan <- result{err: fmt.Errorf("failed to resync user data: %v", err)} return } - // Try to get user data again after resync - userData = s.cacheManager.GetUserDataNoLock(userID) + userData = s.cacheManager.GetUserData(userID) if userData == nil { glog.Warningf("User data still not found for user: %s after resync", userID) resultChan <- result{err: fmt.Errorf("user data not found even after resync")} @@ -664,9 +661,6 @@ func (s *Server) getMarketHash(w http.ResponseWriter, r *http.Request) { case <-ctx.Done(): glog.V(3).Infof("Request timeout or cancelled for /api/v2/market/hash") // On timeout, dump lock info to find who holds the lock - if s.cacheManager != nil { - s.cacheManager.DumpLockInfo("getMarketHash timeout") - } s.sendResponse(w, http.StatusRequestTimeout, false, "Request timeout - hash retrieval took too long", nil) return case res := <-resultChan: @@ -847,7 +841,7 @@ func (s *Server) getMarketData(w http.ResponseWriter, r *http.Request) { // Get user data from cache with timeout check start := time.Now() - userData := s.cacheManager.GetUserDataNoLock(userID) + userData := s.cacheManager.GetUserData(userID) if userData == nil { glog.V(3).Infof("User data not found for user: %s", userID) resultChan <- result{err: fmt.Errorf("user data not found")} diff --git a/pkg/v2/api/system.go b/pkg/v2/api/system.go index 805d6fe..8800931 100644 --- a/pkg/v2/api/system.go +++ b/pkg/v2/api/system.go @@ -404,27 +404,7 @@ func doGetWithBflUser(url, bflUser string) (interface{}, error) { } func doGetUsers(cm *appinfo.CacheManager) ([]map[string]string, error) { - if ok := cm.TryRLock(); !ok { - glog.Warning("[TryRLock] doGetUsers: CacheManager read lock not available") - return nil, nil - } - defer cm.RUnlock() - - var usersInfo []map[string]string - - getUsers := cm.GetCache().Users - for _, v := range getUsers { - if v.UserInfo != nil && v.UserInfo.Exists { - var ui = make(map[string]string) - ui["id"] = v.UserInfo.Id - ui["name"] = v.UserInfo.Name - ui["role"] = v.UserInfo.Role - ui["status"] = v.UserInfo.Status - usersInfo = append(usersInfo, ui) - } - } - - return usersInfo, nil + return cm.ListActiveUsers(), nil } func getenv(key string) string { From d5c5608cd0e512bf98159866d7b7709fbcb70222 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 2 Mar 2026 06:41:02 +0000 Subject: [PATCH 08/45] refactor: remove pipeline trigger mechanism MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pipeline now runs purely on a fixed interval (ticker). Removed: - Pipeline.trigger channel and Trigger()/NotifyPendingDataUpdate() methods - HydrationNotifier interface and CacheManager.hydrationNotifier field - SetHydrationNotifier/setHydrationNotifierInternal methods - Notification call in setAppDataInternal after writing pending data - Hydrator.NotifyPendingDataUpdate empty compatibility method The trigger was redundant: it fired from within Pipeline's own run (Syncer writes pending data → notification → trigger), causing a duplicate run cycle immediately after each scheduled run. Co-authored-by: aby913 --- internal/v2/appinfo/appinfomodule.go | 1 - internal/v2/appinfo/cache.go | 36 ++++------------------------ internal/v2/appinfo/hydration.go | 6 ----- internal/v2/appinfo/pipeline.go | 24 ------------------- 4 files changed, 4 insertions(+), 63 deletions(-) diff --git a/internal/v2/appinfo/appinfomodule.go b/internal/v2/appinfo/appinfomodule.go index cd7e59f..ef0a8fe 100644 --- a/internal/v2/appinfo/appinfomodule.go +++ b/internal/v2/appinfo/appinfomodule.go @@ -242,7 +242,6 @@ func (m *AppInfoModule) Start() error { if m.statusCorrectionChecker != nil { p.SetStatusCorrectionChecker(m.statusCorrectionChecker) } - m.cacheManager.SetHydrationNotifier(p) if err := p.Start(m.ctx); err != nil { return fmt.Errorf("failed to start Pipeline: %w", err) } diff --git a/internal/v2/appinfo/cache.go b/internal/v2/appinfo/cache.go index aac3a97..c5f8b1a 100644 --- a/internal/v2/appinfo/cache.go +++ b/internal/v2/appinfo/cache.go @@ -20,18 +20,12 @@ import ( "k8s.io/client-go/tools/cache" ) -// HydrationNotifier interface for notifying hydrator about pending data updates -type HydrationNotifier interface { - NotifyPendingDataUpdate(userID, sourceID string, pendingData map[string]interface{}) -} - // CacheManager manages the in-memory cache and Redis synchronization type CacheManager struct { - cache *CacheData - redisClient *RedisClient - userConfig *UserConfig - hydrationNotifier HydrationNotifier // Notifier for hydration updates - stateMonitor *utils.StateMonitor // State monitor for change detection + cache *CacheData + redisClient *RedisClient + userConfig *UserConfig + stateMonitor *utils.StateMonitor // State monitor for change detection dataSender *DataSender // Direct data sender for bypassing state monitor mutex sync.RWMutex syncChannel chan SyncRequest @@ -860,23 +854,6 @@ func (cm *CacheManager) getSourceData(userID, sourceID string) *SourceData { return nil } -// SetHydrationNotifier sets the hydration notifier for real-time updates -func (cm *CacheManager) setHydrationNotifierInternal(notifier HydrationNotifier) { - glog.V(4).Infof("[LOCK] cm.mutex.Lock() @216 Start") - lockStart := time.Now() - cm.mutex.Lock() - glog.V(4).Infof("[LOCK] cm.mutex.Lock() @216 Success (wait=%v)", time.Since(lockStart)) - defer cm.mutex.Unlock() - cm.hydrationNotifier = notifier - glog.V(4).Infof("Hydration notifier set successfully") -} - -// SetHydrationNotifier sets the hydration notifier for real-time updates -func (cm *CacheManager) SetHydrationNotifier(notifier HydrationNotifier) { - go func() { - cm.setHydrationNotifierInternal(notifier) - }() -} // updateAppStateLatest updates or adds a single app state based on name matching func (cm *CacheManager) updateAppStateLatest(userID, sourceID string, sourceData *SourceData, newAppState *types.AppStateLatestData) { @@ -1467,11 +1444,6 @@ func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType App glog.V(2).Infof("Updated AppInfoLatestPending: %d new, %d skipped (unchanged version) for user=%s, source=%s", len(sourceData.AppInfoLatestPending), skippedCount, userID, sourceID) - if cm.hydrationNotifier != nil && len(sourceData.AppInfoLatestPending) > 0 { - glog.V(2).Infof("Notifying pipeline about %d pending apps for user=%s, source=%s", - len(sourceData.AppInfoLatestPending), userID, sourceID) - go cm.hydrationNotifier.NotifyPendingDataUpdate(userID, sourceID, data) - } case types.AppRenderFailed: // Handle render failed data - this is typically set by the hydrator when tasks fail if failedAppData, hasFailedApp := data["failed_app"].(*types.AppRenderFailedData); hasFailedApp { diff --git a/internal/v2/appinfo/hydration.go b/internal/v2/appinfo/hydration.go index a40c3b5..e0f14b0 100644 --- a/internal/v2/appinfo/hydration.go +++ b/internal/v2/appinfo/hydration.go @@ -769,12 +769,6 @@ func CreateDefaultHydrator(cache *types.CacheData, settingsManager *settings.Set return NewHydrator(cache, settingsManager, cacheManager, config) } -// NotifyPendingDataUpdate implements HydrationNotifier interface. -// In the new architecture, Pipeline handles notifications directly. -// This method is kept for backward compatibility but does nothing. -func (h *Hydrator) NotifyPendingDataUpdate(userID, sourceID string, pendingData map[string]interface{}) { - glog.V(3).Infof("Hydrator.NotifyPendingDataUpdate: Pipeline handles notifications, user=%s, source=%s", userID, sourceID) -} // SetCacheManager removed: cacheManager must be provided at NewHydrator // batchCompletionProcessor processes completed tasks in batches diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go index e92ec4c..aae5d74 100644 --- a/internal/v2/appinfo/pipeline.go +++ b/internal/v2/appinfo/pipeline.go @@ -28,7 +28,6 @@ type Pipeline struct { dataWatcherRepo *DataWatcherRepo statusCorrectionChecker *StatusCorrectionChecker - trigger chan struct{} mutex sync.Mutex stopChan chan struct{} isRunning atomic.Bool @@ -42,7 +41,6 @@ func NewPipeline(cacheManager *CacheManager, cache *types.CacheData, interval ti return &Pipeline{ cacheManager: cacheManager, cache: cache, - trigger: make(chan struct{}, 1), stopChan: make(chan struct{}), interval: interval, } @@ -54,19 +52,6 @@ func (p *Pipeline) SetDataWatcher(dw *DataWatcher) { p.d func (p *Pipeline) SetDataWatcherRepo(dwr *DataWatcherRepo) { p.dataWatcherRepo = dwr } func (p *Pipeline) SetStatusCorrectionChecker(scc *StatusCorrectionChecker) { p.statusCorrectionChecker = scc } -// NotifyPendingDataUpdate implements HydrationNotifier interface. -// Called by CacheManager after new pending data is written. -func (p *Pipeline) NotifyPendingDataUpdate(userID, sourceID string, pendingData map[string]interface{}) { - if !p.isRunning.Load() { - return - } - glog.V(2).Infof("Pipeline: pending data notification received for user=%s, source=%s", userID, sourceID) - select { - case p.trigger <- struct{}{}: - default: - } -} - func (p *Pipeline) Start(ctx context.Context) error { if p.isRunning.Load() { return nil @@ -86,13 +71,6 @@ func (p *Pipeline) Stop() { glog.Info("Pipeline stopped") } -func (p *Pipeline) Trigger() { - select { - case p.trigger <- struct{}{}: - default: - } -} - func (p *Pipeline) loop(ctx context.Context) { glog.Info("Pipeline loop started") defer glog.Info("Pipeline loop stopped") @@ -106,8 +84,6 @@ func (p *Pipeline) loop(ctx context.Context) { return case <-p.stopChan: return - case <-p.trigger: - p.run(ctx) case <-ticker.C: p.run(ctx) } From 69d93c41f6d7d512ad3203ffeba8177241d20de2 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 2 Mar 2026 06:50:50 +0000 Subject: [PATCH 09/45] refactor: set pipeline interval to 5 minutes and run immediately on start - Change pipeline interval from 30s to 5min (both default and call site) - Call p.run(ctx) before entering the ticker loop so the first cycle executes immediately at startup instead of waiting 5 minutes Co-authored-by: aby913 --- internal/v2/appinfo/appinfomodule.go | 2 +- internal/v2/appinfo/pipeline.go | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/internal/v2/appinfo/appinfomodule.go b/internal/v2/appinfo/appinfomodule.go index ef0a8fe..0e495ce 100644 --- a/internal/v2/appinfo/appinfomodule.go +++ b/internal/v2/appinfo/appinfomodule.go @@ -226,7 +226,7 @@ func (m *AppInfoModule) Start() error { // Create and start Pipeline to orchestrate all components serially if m.config.EnableHydrator && m.cacheManager != nil { - p := NewPipeline(m.cacheManager, m.cacheManager.cache, 30*time.Second) + p := NewPipeline(m.cacheManager, m.cacheManager.cache, 5*time.Minute) if m.syncer != nil { p.SetSyncer(m.syncer) } diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go index aae5d74..4256fdf 100644 --- a/internal/v2/appinfo/pipeline.go +++ b/internal/v2/appinfo/pipeline.go @@ -36,7 +36,7 @@ type Pipeline struct { func NewPipeline(cacheManager *CacheManager, cache *types.CacheData, interval time.Duration) *Pipeline { if interval <= 0 { - interval = 30 * time.Second + interval = 5 * time.Minute } return &Pipeline{ cacheManager: cacheManager, @@ -75,6 +75,8 @@ func (p *Pipeline) loop(ctx context.Context) { glog.Info("Pipeline loop started") defer glog.Info("Pipeline loop stopped") + p.run(ctx) + ticker := time.NewTicker(p.interval) defer ticker.Stop() From 2cc5b37355746ae12980d15d06415961768c1c67 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 2 Mar 2026 07:31:44 +0000 Subject: [PATCH 10/45] refactor: pipeline 30s cycle with syncer self-throttling to 5min Pipeline interval reverted to 30s so DataWatcherRepo and StatusCorrectionChecker execute at their expected frequency. Syncer now tracks lastSyncExecuted internally and skips execution if less than syncInterval (5min) has elapsed, avoiding redundant remote data fetches on every 30s pipeline tick. Co-authored-by: aby913 --- internal/v2/appinfo/appinfomodule.go | 2 +- internal/v2/appinfo/pipeline.go | 2 +- internal/v2/appinfo/syncer.go | 10 +++++++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/internal/v2/appinfo/appinfomodule.go b/internal/v2/appinfo/appinfomodule.go index 0e495ce..ef0a8fe 100644 --- a/internal/v2/appinfo/appinfomodule.go +++ b/internal/v2/appinfo/appinfomodule.go @@ -226,7 +226,7 @@ func (m *AppInfoModule) Start() error { // Create and start Pipeline to orchestrate all components serially if m.config.EnableHydrator && m.cacheManager != nil { - p := NewPipeline(m.cacheManager, m.cacheManager.cache, 5*time.Minute) + p := NewPipeline(m.cacheManager, m.cacheManager.cache, 30*time.Second) if m.syncer != nil { p.SetSyncer(m.syncer) } diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go index 4256fdf..4d7fb09 100644 --- a/internal/v2/appinfo/pipeline.go +++ b/internal/v2/appinfo/pipeline.go @@ -36,7 +36,7 @@ type Pipeline struct { func NewPipeline(cacheManager *CacheManager, cache *types.CacheData, interval time.Duration) *Pipeline { if interval <= 0 { - interval = 5 * time.Minute + interval = 30 * time.Second } return &Pipeline{ cacheManager: cacheManager, diff --git a/internal/v2/appinfo/syncer.go b/internal/v2/appinfo/syncer.go index 0cbe13c..dcb88ab 100644 --- a/internal/v2/appinfo/syncer.go +++ b/internal/v2/appinfo/syncer.go @@ -26,6 +26,8 @@ type Syncer struct { mutex sync.RWMutex // Keep mutex for steps slice operations settingsManager *settings.SettingsManager // Settings manager for data source information + lastSyncExecuted time.Time // Last time a full sync cycle was actually executed + // Status tracking fields lastSyncTime atomic.Value // time.Time lastSyncSuccess atomic.Value // time.Time @@ -130,11 +132,17 @@ func (s *Syncer) StartWithOptions(ctx context.Context, enableSyncLoop bool) erro return nil } -// SyncOnce executes one sync cycle, called by Pipeline +// SyncOnce executes one sync cycle if at least syncInterval has elapsed +// since the last execution. Called by Pipeline on every tick. func (s *Syncer) SyncOnce(ctx context.Context) { if !s.isRunning.Load() { return } + if !s.lastSyncExecuted.IsZero() && time.Since(s.lastSyncExecuted) < s.syncInterval { + glog.V(3).Infof("SyncOnce: skipping, last sync was %v ago (interval: %v)", time.Since(s.lastSyncExecuted), s.syncInterval) + return + } + s.lastSyncExecuted = time.Now() if err := s.executeSyncCycle(ctx); err != nil { glog.Errorf("SyncOnce: sync cycle failed: %v", err) } From d71a0b8cb87655100e6b975093584e39f365433d Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 3 Mar 2026 12:58:19 +0000 Subject: [PATCH 11/45] refactor: centralize hash calculation and ForceSync to Pipeline Phase 5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove scattered hash calculations from Phase 3 (DataWatcherRepo), Phase 4 (StatusCorrectionChecker), and event-driven paths (DataWatcherState) - Phase 1-4 now only modify data; hash calculation and ForceSync happen exactly once per Pipeline cycle in Phase 5 - Add dirty users mechanism (MarkUserDirty/CollectAndClearDirtyUsers) for event-driven paths to defer hash calculation to the next Pipeline cycle - DataWatcherRepo.ProcessOnce() and StatusCorrectionChecker.PerformStatusCheckOnce() now return affected user sets for Phase 5 to consume - calculateAndSetUserHashDirect() no longer calls ForceSync internally - Pipeline Phase 5 merges affected users from Phase 2/3/4 + dirty users - ForceSync rate-limit log downgraded from Error to Warning - Remove '临时注释' from cache.go Start method Co-authored-by: aby913 --- internal/v2/appinfo/cache.go | 2 +- internal/v2/appinfo/datawatcher_app.go | 95 +++++++------------ internal/v2/appinfo/datawatcher_repo.go | 47 ++++----- internal/v2/appinfo/datawatcher_state.go | 11 +-- internal/v2/appinfo/pipeline.go | 54 +++++++---- .../v2/appinfo/status_correction_check.go | 53 ++++------- 6 files changed, 117 insertions(+), 145 deletions(-) diff --git a/internal/v2/appinfo/cache.go b/internal/v2/appinfo/cache.go index c5f8b1a..d8a6c4d 100644 --- a/internal/v2/appinfo/cache.go +++ b/internal/v2/appinfo/cache.go @@ -720,7 +720,7 @@ func (cm *CacheManager) Start() error { _wd() // Start sync worker goroutine - go cm.syncWorker() // + 临时注释 + go cm.syncWorker() // Start periodic cleanup of AppRenderFailed data (every 5 minutes) cm.cleanupTicker = time.NewTicker(5 * time.Minute) diff --git a/internal/v2/appinfo/datawatcher_app.go b/internal/v2/appinfo/datawatcher_app.go index afedb63..9a16292 100644 --- a/internal/v2/appinfo/datawatcher_app.go +++ b/internal/v2/appinfo/datawatcher_app.go @@ -30,6 +30,10 @@ type DataWatcher struct { activeHashCalculations map[string]bool hashMutex sync.Mutex + // Dirty users tracking for deferred hash calculation + dirtyUsers map[string]bool + dirtyUsersMutex sync.Mutex + // Metrics - using atomic operations for thread safety totalAppsProcessed int64 totalAppsMoved int64 @@ -46,6 +50,7 @@ func NewDataWatcher(cacheManager *CacheManager, hydrator *Hydrator, dataSender * stopChan: make(chan struct{}), isRunning: 0, // Initialize as false activeHashCalculations: make(map[string]bool), + dirtyUsers: make(map[string]bool), } } @@ -250,46 +255,9 @@ func (dw *DataWatcher) processUserData(userID string, userData *types.UserData) totalMoved += moved } - // Step 3: Calculate hash if apps were moved OR if hash is empty - shouldCalculateHash := totalMoved > 0 || userData.Hash == "" - - if shouldCalculateHash { - if totalMoved > 0 { - glog.Infof("DataWatcher: %d apps moved for user %s, scheduling hash calculation", totalMoved, userID) - } else { - glog.Infof("DataWatcher: Hash is empty for user %s, scheduling hash calculation", userID) - } - - // Schedule hash calculation in a separate goroutine without setting the flag here - go func() { - // Check if hash calculation is already in progress for this user - dw.hashMutex.Lock() - if dw.activeHashCalculations[userID] { - dw.hashMutex.Unlock() - glog.Warningf("DataWatcher: Hash calculation already in progress for user %s, skipping", userID) - return - } - dw.activeHashCalculations[userID] = true - dw.hashMutex.Unlock() - - defer func() { - // Clean up tracking when done - dw.hashMutex.Lock() - delete(dw.activeHashCalculations, userID) - dw.hashMutex.Unlock() - glog.V(3).Infof("DataWatcher: Hash calculation tracking cleaned up for user %s", userID) - }() - - // Wait a short time to ensure all source processing locks are released - time.Sleep(100 * time.Millisecond) - glog.V(3).Infof("DataWatcher: Starting hash calculation for user %s", userID) - - // Call the hash calculation function directly - dw.calculateAndSetUserHashDirect(userID, userData) - }() - } else { - glog.V(3).Infof("DataWatcher: No apps moved and hash exists for user %s, skipping hash calculation", userID) - } + // Hash calculation is deferred to Pipeline Phase 5. + // The caller (Pipeline.phaseHydrateApps) tracks affected users and + // Phase 5 will calculate hashes for all affected users in one pass. return totalProcessed, totalMoved } @@ -357,56 +325,38 @@ func (dw *DataWatcher) calculateAndSetUserHashWithRetry(userID string, userData glog.Errorf("DataWatcher: Hash calculation failed after %d attempts for user %s", maxRetries, userID) } -// calculateAndSetUserHashDirect calculates hash without tracking (used internally by goroutines) +// calculateAndSetUserHashDirect calculates and updates hash for a single user. +// Does NOT call ForceSync — the caller (Pipeline Phase 5) is responsible for syncing. func (dw *DataWatcher) calculateAndSetUserHashDirect(userID string, userData *types.UserData) bool { - glog.V(2).Infof("Serial pipeline: DataWatcherApp, user: %s", userID) glog.V(3).Infof("DataWatcher: Starting direct hash calculation for user %s", userID) - // Get the original user data from cache manager to ensure we have the latest reference originalUserData := dw.cacheManager.GetUserData(userID) if originalUserData == nil { glog.Errorf("DataWatcher: Failed to get user data from cache manager for user %s", userID) return false } - // Create snapshot for hash calculation without holding any locks - glog.V(3).Infof("DataWatcher: Creating user data snapshot for user %s", userID) snapshot, err := utils.CreateUserDataSnapshot(userID, originalUserData) if err != nil { glog.Errorf("DataWatcher: Failed to create user data snapshot for user %s: %v", userID, err) return false } - glog.V(4).Infof("DataWatcher: Calculating hash for user %s", userID) - // Calculate hash using the snapshot newHash, err := utils.CalculateUserDataHash(snapshot) if err != nil { glog.Errorf("DataWatcher: Failed to calculate hash for user %s: %v", userID, err) return false } - // Get current hash for comparison currentHash := originalUserData.Hash - glog.V(3).Infof("DataWatcher: Hash comparison for user %s - current: '%s', new: '%s'", userID, currentHash, newHash) - if currentHash == newHash { glog.V(2).Infof("DataWatcher: Hash unchanged for user %s: %s", userID, newHash) return true } glog.V(2).Infof("DataWatcher: Hash changed for user %s: %s -> %s", userID, currentHash, newHash) - dw.cacheManager.SetUserHash(userID, newHash) - glog.V(3).Infof("DataWatcher: Hash updated for user %s", userID) - - if err := dw.cacheManager.ForceSync(); err != nil { - glog.Errorf("DataWatcher: Failed to force sync after hash update for user %s: %v", userID, err) - return false - } else { - glog.V(2).Infof("DataWatcher: Force sync completed after hash update for user %s", userID) - } - return true } @@ -979,6 +929,31 @@ func (dw *DataWatcher) ForceCalculateAllUsersHash() error { return nil } +// MarkUserDirty marks a user as needing hash recalculation. +// Called by event-driven paths (e.g. DataWatcherState) that modify user data +// outside the Pipeline cycle. The dirty users will be picked up by Pipeline Phase 5. +func (dw *DataWatcher) MarkUserDirty(userID string) { + dw.dirtyUsersMutex.Lock() + defer dw.dirtyUsersMutex.Unlock() + dw.dirtyUsers[userID] = true + glog.V(3).Infof("DataWatcher: Marked user %s as dirty for deferred hash calculation", userID) +} + +// CollectAndClearDirtyUsers returns all dirty user IDs and clears the set. +// Called by Pipeline Phase 5 to collect users that need hash recalculation +// from event-driven paths. +func (dw *DataWatcher) CollectAndClearDirtyUsers() map[string]bool { + dw.dirtyUsersMutex.Lock() + defer dw.dirtyUsersMutex.Unlock() + if len(dw.dirtyUsers) == 0 { + return nil + } + result := dw.dirtyUsers + dw.dirtyUsers = make(map[string]bool) + glog.V(3).Infof("DataWatcher: Collected %d dirty users for hash calculation", len(result)) + return result +} + // getAppVersion extracts app version from pending app data func (dw *DataWatcher) getAppVersion(pendingApp *types.AppInfoLatestPendingData) string { if pendingApp == nil { diff --git a/internal/v2/appinfo/datawatcher_repo.go b/internal/v2/appinfo/datawatcher_repo.go index 4e2235f..a9b930a 100644 --- a/internal/v2/appinfo/datawatcher_repo.go +++ b/internal/v2/appinfo/datawatcher_repo.go @@ -158,13 +158,14 @@ func (dwr *DataWatcherRepo) StartWithOptions(enablePolling bool) error { return nil } -// ProcessOnce executes one round of state change processing, called by serial pipeline -func (dwr *DataWatcherRepo) ProcessOnce() { +// ProcessOnce executes one round of state change processing, called by Pipeline Phase 3. +// Returns the set of affected user IDs whose data was modified. +func (dwr *DataWatcherRepo) ProcessOnce() map[string]bool { if !dwr.isRunning { - return + return nil } - dwr.processStateChanges() + return dwr.processStateChanges() } // Stop stops the periodic state checking process @@ -219,30 +220,29 @@ func (dwr *DataWatcherRepo) monitorStateChanges() { } // processStateChanges fetches and processes new state changes -func (dwr *DataWatcherRepo) processStateChanges() error { +func (dwr *DataWatcherRepo) processStateChanges() map[string]bool { glog.V(3).Infof("Processing state changes after ID: %d", dwr.lastProcessedID) + affectedUsers := make(map[string]bool) - // Fetch new state changes from API stateChanges, err := dwr.fetchStateChanges(dwr.lastProcessedID) if err != nil { - return fmt.Errorf("failed to fetch state changes: %w", err) + glog.Errorf("Failed to fetch state changes: %v", err) + return affectedUsers } if len(stateChanges) == 0 { glog.V(3).Info("No new state changes found") - return nil + return affectedUsers } glog.V(2).Infof("Found %d new state changes", len(stateChanges)) - // Sort state changes by ID to ensure proper order sort.Slice(stateChanges, func(i, j int) bool { return stateChanges[i].ID < stateChanges[j].ID }) glog.V(2).Info("State changes sorted by ID, processing in order...") - // Process state changes in order by ID var lastProcessedID int64 for _, change := range stateChanges { if err := dwr.processStateChange(change); err != nil { @@ -250,17 +250,28 @@ func (dwr *DataWatcherRepo) processStateChanges() error { continue } + // Track affected users from each change type + if change.AppData != nil && change.AppData.UserID != "" { + affectedUsers[change.AppData.UserID] = true + } + if change.Type == "image_info_updated" { + // Image updates affect all users + allUsers := dwr.cacheManager.GetAllUsersData() + for userID := range allUsers { + affectedUsers[userID] = true + } + } + lastProcessedID = change.ID } - // Update the last processed ID in Redis ctx := context.Background() err = dwr.redisClient.client.Set(ctx, "datawatcher:last_processed_id", strconv.FormatInt(lastProcessedID, 10), 0).Err() if err != nil { glog.Errorf("Failed to update last processed ID in Redis: %v", err) } - return nil + return affectedUsers } // fetchStateChanges calls the /state-changes API to get new state changes @@ -380,17 +391,7 @@ func (dwr *DataWatcherRepo) handleImageInfoUpdated(change *StateChange) error { updatedCount := dwr.updateImageInfoInCache(imageName, updatedImageInfo) glog.V(3).Infof("Updated image info for %s in %d cache entries", imageName, updatedCount) - // Step 3: Trigger hash calculation for all users - if dwr.dataWatcher != nil { - if err := dwr.dataWatcher.ForceCalculateAllUsersHash(); err != nil { - glog.Errorf("Failed to trigger hash calculation for all users: %v", err) - return fmt.Errorf("failed to trigger hash calculation: %w", err) - } - glog.V(3).Info("Successfully triggered hash calculation for all users after image update") - } else { - glog.V(3).Info("DataWatcher not available, skipping hash calculation") - } - + // Hash calculation is deferred to Pipeline Phase 5. glog.V(2).Infof("Successfully handled image info updated for image: %s", imageName) return nil } diff --git a/internal/v2/appinfo/datawatcher_state.go b/internal/v2/appinfo/datawatcher_state.go index 0de0be8..bdf09ed 100644 --- a/internal/v2/appinfo/datawatcher_state.go +++ b/internal/v2/appinfo/datawatcher_state.go @@ -989,16 +989,9 @@ func (dw *DataWatcherState) storeStateToCache(msg AppStateMessage) { glog.V(2).Infof("Successfully stored app state to cache for user=%s, source=%s, app=%s, state=%s", userID, sourceID, msg.Name, msg.State) - // Call ForceCalculateAllUsersHash for hash calculation after successful cache update + // Mark user as dirty for deferred hash calculation in Pipeline Phase 5 if dw.dataWatcher != nil { - glog.V(3).Infof("Triggering hash recalculation for all users after cache update") - if err := dw.dataWatcher.ForceCalculateAllUsersHash(); err != nil { - glog.Errorf("Failed to force calculate all users hash: %v", err) - } else { - glog.V(2).Infof("Successfully triggered hash recalculation for all users") - } - } else { - glog.V(3).Infof("DataWatcher not available, skipping hash recalculation") + dw.dataWatcher.MarkUserDirty(userID) } } } diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go index 4d7fb09..ed8cc65 100644 --- a/internal/v2/appinfo/pipeline.go +++ b/internal/v2/appinfo/pipeline.go @@ -101,11 +101,31 @@ func (p *Pipeline) run(ctx context.Context) { startTime := time.Now() + // Phase 1-4: only modify data, no hash calculation or ForceSync p.phaseSyncer(ctx) - affectedUsers := p.phaseHydrateApps(ctx) - p.phaseDataWatcherRepo(ctx) - p.phaseStatusCorrection(ctx) - p.phaseHashAndSync(affectedUsers) + hydrateUsers := p.phaseHydrateApps(ctx) + repoUsers := p.phaseDataWatcherRepo(ctx) + statusUsers := p.phaseStatusCorrection(ctx) + + // Phase 5: merge all affected users + dirty users, calculate hash once, sync once + allAffected := make(map[string]bool) + for u := range hydrateUsers { + allAffected[u] = true + } + for u := range repoUsers { + allAffected[u] = true + } + for u := range statusUsers { + allAffected[u] = true + } + // Collect dirty users from event-driven paths (DataWatcherState) + if p.dataWatcher != nil { + for u := range p.dataWatcher.CollectAndClearDirtyUsers() { + allAffected[u] = true + } + } + + p.phaseHashAndSync(allAffected) if elapsed := time.Since(startTime); elapsed > 5*time.Second { glog.V(2).Infof("Pipeline: cycle completed in %v", elapsed) @@ -168,40 +188,42 @@ func (p *Pipeline) phaseHydrateApps(ctx context.Context) map[string]bool { } // phaseDataWatcherRepo processes chart-repo state changes -func (p *Pipeline) phaseDataWatcherRepo(ctx context.Context) { +func (p *Pipeline) phaseDataWatcherRepo(ctx context.Context) map[string]bool { if p.dataWatcherRepo == nil { - return + return nil } select { case <-ctx.Done(): - return + return nil case <-p.stopChan: - return + return nil default: } glog.V(3).Info("Pipeline Phase 3: DataWatcherRepo") - p.dataWatcherRepo.ProcessOnce() + return p.dataWatcherRepo.ProcessOnce() } // phaseStatusCorrection corrects app running statuses -func (p *Pipeline) phaseStatusCorrection(ctx context.Context) { +func (p *Pipeline) phaseStatusCorrection(ctx context.Context) map[string]bool { if p.statusCorrectionChecker == nil { - return + return nil } select { case <-ctx.Done(): - return + return nil case <-p.stopChan: - return + return nil default: } glog.V(3).Info("Pipeline Phase 4: StatusCorrectionChecker") - p.statusCorrectionChecker.PerformStatusCheckOnce() + return p.statusCorrectionChecker.PerformStatusCheckOnce() } -// phaseHashAndSync calculates user hashes and syncs to Redis +// phaseHashAndSync calculates user hashes for all affected users and syncs to Redis. +// This is the single point where hash calculation and ForceSync happen per Pipeline cycle. func (p *Pipeline) phaseHashAndSync(affectedUsers map[string]bool) { if p.dataWatcher != nil && len(affectedUsers) > 0 { + glog.V(2).Infof("Pipeline Phase 5: calculating hash for %d affected users", len(affectedUsers)) for userID := range affectedUsers { userData := p.cacheManager.GetUserData(userID) if userData != nil { @@ -211,7 +233,7 @@ func (p *Pipeline) phaseHashAndSync(affectedUsers map[string]bool) { } if p.cacheManager != nil { if err := p.cacheManager.ForceSync(); err != nil { - glog.Errorf("Pipeline: ForceSync failed: %v", err) + glog.Warningf("Pipeline: ForceSync rate limited: %v", err) } } } diff --git a/internal/v2/appinfo/status_correction_check.go b/internal/v2/appinfo/status_correction_check.go index e5c7c9d..285ff87 100644 --- a/internal/v2/appinfo/status_correction_check.go +++ b/internal/v2/appinfo/status_correction_check.go @@ -128,12 +128,13 @@ func (scc *StatusCorrectionChecker) StartWithOptions(enablePeriodicCheck bool) e return nil } -// PerformStatusCheckOnce executes one status check cycle, called by serial pipeline -func (scc *StatusCorrectionChecker) PerformStatusCheckOnce() { +// PerformStatusCheckOnce executes one status check cycle, called by Pipeline Phase 4. +// Returns the set of affected user IDs whose data was modified. +func (scc *StatusCorrectionChecker) PerformStatusCheckOnce() map[string]bool { if !scc.isRunning { - return + return nil } - scc.performStatusCheck() + return scc.performStatusCheck() } // Stop stops the periodic status checking @@ -200,8 +201,9 @@ func (scc *StatusCorrectionChecker) runPeriodicCheck() { } // performStatusCheck performs a single status check cycle -func (scc *StatusCorrectionChecker) performStatusCheck() { +func (scc *StatusCorrectionChecker) performStatusCheck() map[string]bool { startTime := time.Now() + result := make(map[string]bool) scc.mutex.Lock() scc.lastCheckTime = startTime @@ -210,25 +212,22 @@ func (scc *StatusCorrectionChecker) performStatusCheck() { glog.Infof("Starting status check cycle #%d", scc.checkCount) - // Fetch latest status from app-service latestStatus, err := scc.fetchLatestStatus() if err != nil { glog.Errorf("Failed to fetch latest status from app-service: %v", err) - return + return result } glog.V(2).Infof("Fetched status for %d applications and middlewares from app-service", len(latestStatus)) - // Get current status from cache cachedStatus := scc.getCachedStatus() if len(cachedStatus) == 0 { glog.Infof("No cached status found, skipping comparison") - return + return result } glog.V(2).Infof("Found cached status for %d applications and middlewares", len(cachedStatus)) - // Compare and detect changes changes := scc.compareStatus(latestStatus, cachedStatus) glog.V(2).Infof("[UserChanged] Found cached status, changed: %+v", changes) @@ -237,18 +236,16 @@ func (scc *StatusCorrectionChecker) performStatusCheck() { glog.V(2).Infof("Detected %d status changes, applying corrections", len(changes)) scc.applyCorrections(changes, latestStatus) - // After applying corrections, recalculate and update user data hash for all affected users. - // This ensures the hash stays consistent with the latest user data state. - // The hash calculation logic is consistent with DataWatcher (see datawatcher_app.go). - // affectedUsers := make(map[string]struct{}) - affectedUsers := make(map[string]*StatusChange) + // Apply UserInfo changes and collect affected users. + // Hash calculation and ForceSync are deferred to Pipeline Phase 5. + changesByUser := make(map[string]*StatusChange) for _, change := range changes { - affectedUsers[change.UserID] = &change //change.ChangeType + changesByUser[change.UserID] = &change } - for userID, cs := range affectedUsers { + for userID, cs := range changesByUser { userData := scc.cacheManager.GetUserData(userID) if userData == nil { - glog.V(3).Infof("StatusCorrectionChecker: userData not found for user %s, skip hash calculation", userID) + glog.V(3).Infof("StatusCorrectionChecker: userData not found for user %s", userID) continue } @@ -263,23 +260,7 @@ func (scc *StatusCorrectionChecker) performStatusCheck() { glog.V(2).Infof("[UserChanged] userId: %s, userInfo is null", cs.UserID) } - // Generate snapshot for hash calculation (reuse logic from DataWatcher) - snapshot, err := utils.CreateUserDataSnapshot(userID, userData) - if err != nil { - glog.Errorf("StatusCorrectionChecker: failed to create snapshot for user %s: %v", userID, err) - continue - } - newHash, err := utils.CalculateUserDataHash(snapshot) - if err != nil { - glog.Errorf("StatusCorrectionChecker: failed to calculate hash for user %s: %v", userID, err) - continue - } - scc.cacheManager.SetUserHash(userID, newHash) - glog.V(2).Infof("StatusCorrectionChecker: user %s hash updated to %s", userID, newHash) - } - // Force sync after hash update - if err := scc.cacheManager.ForceSync(); err != nil { - glog.Errorf("StatusCorrectionChecker: ForceSync failed after hash update: %v", err) + result[userID] = true } scc.mutex.Lock() @@ -289,10 +270,10 @@ func (scc *StatusCorrectionChecker) performStatusCheck() { glog.V(3).Info("No status changes detected") } - // Check and correct task statuses scc.checkAndCorrectTaskStatuses(latestStatus) glog.V(2).Infof("Status check cycle #%d completed in %v", scc.checkCount, time.Since(startTime)) + return result } // fetchLatestStatus fetches the latest status from app-service From b0b939ceb857880af9a91847b8c6055d105089f6 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 4 Mar 2026 04:01:28 +0000 Subject: [PATCH 12/45] fix: ensure pipeline syncs data for newly added sources Two bugs were preventing newly added sources from being processed promptly: 1. SyncOnce throttle ignored source config changes: The syncer's SyncOnce() only checked a time-based throttle (syncInterval, default 5min). When a new remote source was added, the syncer would not run until the throttle expired, leaving the new source without app data for up to 5 minutes. Added hasRemoteSourceConfigChanged() to detect when the set of remote source IDs changes and force an immediate sync cycle. 2. LatestData initialized as non-nil empty struct: In NewSyncContextWithManager, LatestData was initialized as &AppStoreInfoResponse{} instead of nil. When sync steps were skipped (hash match for existing sources), the check 'syncContext.LatestData != nil' still passed, causing storeDataViaCacheManager to run with empty data. This unnecessarily cleared AppInfoLatestPending for hash-matched sources. Changed initialization to nil so the store code only runs when DataFetchStep actually populates LatestData. Co-authored-by: aby913 --- internal/v2/appinfo/syncer.go | 41 ++++++++++++++++++- .../v2/appinfo/syncerfn/step_interface.go | 2 +- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/internal/v2/appinfo/syncer.go b/internal/v2/appinfo/syncer.go index dcb88ab..25dbf07 100644 --- a/internal/v2/appinfo/syncer.go +++ b/internal/v2/appinfo/syncer.go @@ -3,6 +3,8 @@ package appinfo import ( "context" "fmt" + "sort" + "strings" "sync" "sync/atomic" "time" @@ -28,6 +30,8 @@ type Syncer struct { lastSyncExecuted time.Time // Last time a full sync cycle was actually executed + lastKnownRemoteSourceIDs atomic.Value // string: sorted comma-joined remote source IDs from last sync + // Status tracking fields lastSyncTime atomic.Value // time.Time lastSyncSuccess atomic.Value // time.Time @@ -64,6 +68,7 @@ func NewSyncer(cache *CacheData, syncInterval time.Duration, settingsManager *se s.currentStep.Store("") s.lastSyncDuration.Store(time.Duration(0)) s.currentSource.Store("") + s.lastKnownRemoteSourceIDs.Store("") return s } @@ -133,21 +138,53 @@ func (s *Syncer) StartWithOptions(ctx context.Context, enableSyncLoop bool) erro } // SyncOnce executes one sync cycle if at least syncInterval has elapsed -// since the last execution. Called by Pipeline on every tick. +// since the last execution, OR if the remote source configuration has changed +// (e.g. a new source was added). Called by Pipeline on every tick. func (s *Syncer) SyncOnce(ctx context.Context) { if !s.isRunning.Load() { return } - if !s.lastSyncExecuted.IsZero() && time.Since(s.lastSyncExecuted) < s.syncInterval { + + sourceChanged := s.hasRemoteSourceConfigChanged() + + if !sourceChanged && !s.lastSyncExecuted.IsZero() && time.Since(s.lastSyncExecuted) < s.syncInterval { glog.V(3).Infof("SyncOnce: skipping, last sync was %v ago (interval: %v)", time.Since(s.lastSyncExecuted), s.syncInterval) return } + if sourceChanged { + glog.V(2).Info("SyncOnce: remote source configuration changed, forcing sync cycle") + } s.lastSyncExecuted = time.Now() if err := s.executeSyncCycle(ctx); err != nil { glog.Errorf("SyncOnce: sync cycle failed: %v", err) } } +// hasRemoteSourceConfigChanged checks if the set of remote market sources has +// changed since the last sync cycle (e.g. a source was added or removed). +func (s *Syncer) hasRemoteSourceConfigChanged() bool { + config := s.settingsManager.GetMarketSources() + if config == nil || len(config.Sources) == 0 { + return false + } + + var remoteIDs []string + for _, src := range config.Sources { + if src.Type == "remote" { + remoteIDs = append(remoteIDs, src.ID) + } + } + sort.Strings(remoteIDs) + currentKey := strings.Join(remoteIDs, ",") + + lastKnown, _ := s.lastKnownRemoteSourceIDs.Load().(string) + if currentKey != lastKnown { + s.lastKnownRemoteSourceIDs.Store(currentKey) + return lastKnown != "" // first call (empty -> populated) is not a "change" + } + return false +} + // Stop stops the synchronization process func (s *Syncer) Stop() { if !s.mutex.TryLock() { diff --git a/internal/v2/appinfo/syncerfn/step_interface.go b/internal/v2/appinfo/syncerfn/step_interface.go index e7e2945..bbf9b1d 100644 --- a/internal/v2/appinfo/syncerfn/step_interface.go +++ b/internal/v2/appinfo/syncerfn/step_interface.go @@ -68,7 +68,7 @@ func NewSyncContextWithManager(cache *types.CacheData, cacheManager types.CacheM Client: resty.New(), Cache: cache, CacheManager: cacheManager, - LatestData: &AppStoreInfoResponse{}, + LatestData: nil, DetailedApps: make(map[string]interface{}), AppIDs: make([]string, 0), Errors: make([]error, 0), From 177b24582bb328ff0a6966050061cfc4ec8b4c2b Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 4 Mar 2026 04:13:30 +0000 Subject: [PATCH 13/45] fix: also detect user list changes in SyncOnce throttle bypass Extend hasSyncRelevantConfigChanged (renamed from hasRemoteSourceConfigChanged) to also track the user ID list via CacheManager.GetUserIDs(). When a user is added or removed, the syncer now detects the change on the next pipeline tick and forces an immediate sync cycle, ensuring new users get their app data without waiting for the full sync interval to expire. Co-authored-by: aby913 --- internal/v2/appinfo/syncer.go | 66 +++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/internal/v2/appinfo/syncer.go b/internal/v2/appinfo/syncer.go index 25dbf07..d9fb0e0 100644 --- a/internal/v2/appinfo/syncer.go +++ b/internal/v2/appinfo/syncer.go @@ -31,6 +31,7 @@ type Syncer struct { lastSyncExecuted time.Time // Last time a full sync cycle was actually executed lastKnownRemoteSourceIDs atomic.Value // string: sorted comma-joined remote source IDs from last sync + lastKnownUserIDs atomic.Value // string: sorted comma-joined user IDs from last sync // Status tracking fields lastSyncTime atomic.Value // time.Time @@ -69,6 +70,7 @@ func NewSyncer(cache *CacheData, syncInterval time.Duration, settingsManager *se s.lastSyncDuration.Store(time.Duration(0)) s.currentSource.Store("") s.lastKnownRemoteSourceIDs.Store("") + s.lastKnownUserIDs.Store("") return s } @@ -138,21 +140,21 @@ func (s *Syncer) StartWithOptions(ctx context.Context, enableSyncLoop bool) erro } // SyncOnce executes one sync cycle if at least syncInterval has elapsed -// since the last execution, OR if the remote source configuration has changed -// (e.g. a new source was added). Called by Pipeline on every tick. +// since the last execution, OR if the sync-relevant configuration has changed +// (e.g. a new source or user was added/removed). Called by Pipeline on every tick. func (s *Syncer) SyncOnce(ctx context.Context) { if !s.isRunning.Load() { return } - sourceChanged := s.hasRemoteSourceConfigChanged() + configChanged, reason := s.hasSyncRelevantConfigChanged() - if !sourceChanged && !s.lastSyncExecuted.IsZero() && time.Since(s.lastSyncExecuted) < s.syncInterval { + if !configChanged && !s.lastSyncExecuted.IsZero() && time.Since(s.lastSyncExecuted) < s.syncInterval { glog.V(3).Infof("SyncOnce: skipping, last sync was %v ago (interval: %v)", time.Since(s.lastSyncExecuted), s.syncInterval) return } - if sourceChanged { - glog.V(2).Info("SyncOnce: remote source configuration changed, forcing sync cycle") + if configChanged { + glog.V(2).Infof("SyncOnce: %s, forcing sync cycle", reason) } s.lastSyncExecuted = time.Now() if err := s.executeSyncCycle(ctx); err != nil { @@ -160,29 +162,47 @@ func (s *Syncer) SyncOnce(ctx context.Context) { } } -// hasRemoteSourceConfigChanged checks if the set of remote market sources has -// changed since the last sync cycle (e.g. a source was added or removed). -func (s *Syncer) hasRemoteSourceConfigChanged() bool { +// hasSyncRelevantConfigChanged checks whether the remote source list or the +// user list has changed since the last sync cycle. Returns true with a +// human-readable reason when a change is detected. +func (s *Syncer) hasSyncRelevantConfigChanged() (changed bool, reason string) { + // Check remote sources config := s.settingsManager.GetMarketSources() - if config == nil || len(config.Sources) == 0 { - return false + if config != nil && len(config.Sources) > 0 { + var remoteIDs []string + for _, src := range config.Sources { + if src.Type == "remote" { + remoteIDs = append(remoteIDs, src.ID) + } + } + sort.Strings(remoteIDs) + currentKey := strings.Join(remoteIDs, ",") + + lastKnown, _ := s.lastKnownRemoteSourceIDs.Load().(string) + if currentKey != lastKnown { + s.lastKnownRemoteSourceIDs.Store(currentKey) + if lastKnown != "" { + return true, "remote source configuration changed" + } + } } - var remoteIDs []string - for _, src := range config.Sources { - if src.Type == "remote" { - remoteIDs = append(remoteIDs, src.ID) + // Check user list + if cm := s.cacheManager.Load(); cm != nil { + userIDs := cm.GetUserIDs() + sort.Strings(userIDs) + currentKey := strings.Join(userIDs, ",") + + lastKnown, _ := s.lastKnownUserIDs.Load().(string) + if currentKey != lastKnown { + s.lastKnownUserIDs.Store(currentKey) + if lastKnown != "" { + return true, "user list changed" + } } } - sort.Strings(remoteIDs) - currentKey := strings.Join(remoteIDs, ",") - lastKnown, _ := s.lastKnownRemoteSourceIDs.Load().(string) - if currentKey != lastKnown { - s.lastKnownRemoteSourceIDs.Store(currentKey) - return lastKnown != "" // first call (empty -> populated) is not a "change" - } - return false + return false, "" } // Stop stops the synchronization process From 180ae716480549c189bff089994f974729fc2ef0 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 4 Mar 2026 06:01:38 +0000 Subject: [PATCH 14/45] fix: add V(2) diagnostic logs for silently skipped hydration apps HydrateSingleApp had several early-return paths that silently returned false without any log output at V(2) level, making it impossible to diagnose why pending apps were not being processed. Added V(2) log lines for three skip conditions: - App is in the render failed list (will retry after 5min cleanup) - App is already in the latest queue with the same version - convertApplicationInfoEntryToMap returned empty data Co-authored-by: aby913 --- internal/v2/appinfo/pipeline.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go index ed8cc65..15cd671 100644 --- a/internal/v2/appinfo/pipeline.go +++ b/internal/v2/appinfo/pipeline.go @@ -266,6 +266,8 @@ func (h *Hydrator) HydrateSingleApp(ctx context.Context, userID, sourceID string } if h.isAppInRenderFailedList(userID, sourceID, appID, appName) { + glog.V(2).Infof("HydrateSingleApp: skipping %s %s (user=%s, source=%s) - in render failed list, will retry after cleanup", + appID, appName, userID, sourceID) return false } @@ -278,11 +280,15 @@ func (h *Hydrator) HydrateSingleApp(ctx context.Context, userID, sourceID string version = pendingData.RawData.Version } if h.isAppInLatestQueue(userID, sourceID, appID, appName, version) { + glog.V(2).Infof("HydrateSingleApp: skipping %s %s (user=%s, source=%s) - already in latest queue with version %s", + appID, appName, userID, sourceID, version) return false } appDataMap := h.convertApplicationInfoEntryToMap(pendingData.RawData) if len(appDataMap) == 0 { + glog.V(2).Infof("HydrateSingleApp: skipping %s %s (user=%s, source=%s) - convertApplicationInfoEntryToMap returned empty", + appID, appName, userID, sourceID) return false } From aeb7473849c531925ca6a1e8fbc7644c63ba8c44 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 4 Mar 2026 08:13:42 +0000 Subject: [PATCH 15/45] perf: batch-parallel hydration in Pipeline Phase 2 The main bottleneck in Phase 2 is the HTTP call to chart-repo dcr/sync-app in TaskForApiStep (3s timeout). With serial processing, N apps could block the pipeline for up to N*3 seconds. Change phaseHydrateApps to process pending apps in concurrent batches: - Default concurrency: 5 (configurable via PIPELINE_HYDRATION_CONCURRENCY) - Each batch fires N goroutines running HydrateSingleApp in parallel - WaitGroup ensures the batch completes before moving to the next - ProcessSingleAppToLatest remains sequential per batch to avoid concurrent writes to the same source's AppInfoLatest slice Cache concurrency is safe because: - All write operations go through CacheManager.mutex.Lock (micro-second hold times, different apps touch different pendingData objects) - Read-only checks (isAppInRenderFailedList, isAppInLatestQueue) use CacheManager.mutex.RLock which allows concurrent readers - findPendingDataFromCache reads without lock but only accesses data that Phase 1 has already finished writing Co-authored-by: aby913 --- internal/v2/appinfo/pipeline.go | 85 ++++++++++++++++++++++++++------- 1 file changed, 67 insertions(+), 18 deletions(-) diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go index 15cd671..dc69c06 100644 --- a/internal/v2/appinfo/pipeline.go +++ b/internal/v2/appinfo/pipeline.go @@ -2,6 +2,8 @@ package appinfo import ( "context" + "os" + "strconv" "sync" "sync/atomic" "time" @@ -19,6 +21,8 @@ import ( // Phase 3: DataWatcherRepo - process chart-repo state changes // Phase 4: StatusCorrectionChecker - correct app running statuses // Phase 5: Hash calculation + ForceSync +const defaultHydrationConcurrency = 5 + type Pipeline struct { cacheManager *CacheManager cache *types.CacheData @@ -28,21 +32,29 @@ type Pipeline struct { dataWatcherRepo *DataWatcherRepo statusCorrectionChecker *StatusCorrectionChecker - mutex sync.Mutex - stopChan chan struct{} - isRunning atomic.Bool - interval time.Duration + mutex sync.Mutex + stopChan chan struct{} + isRunning atomic.Bool + interval time.Duration + hydrationConcurrency int } func NewPipeline(cacheManager *CacheManager, cache *types.CacheData, interval time.Duration) *Pipeline { if interval <= 0 { interval = 30 * time.Second } + + concurrency := defaultHydrationConcurrency + if v, err := strconv.Atoi(os.Getenv("PIPELINE_HYDRATION_CONCURRENCY")); err == nil && v > 0 { + concurrency = v + } + return &Pipeline{ - cacheManager: cacheManager, - cache: cache, - stopChan: make(chan struct{}), - interval: interval, + cacheManager: cacheManager, + cache: cache, + stopChan: make(chan struct{}), + interval: interval, + hydrationConcurrency: concurrency, } } @@ -148,7 +160,8 @@ func (p *Pipeline) phaseSyncer(ctx context.Context) { p.syncer.SyncOnce(ctx) } -// phaseHydrateApps processes pending apps one by one through hydration + move to Latest +// phaseHydrateApps processes pending apps in concurrent batches through hydration + move to Latest. +// Batch size is controlled by hydrationConcurrency (default 5, env PIPELINE_HYDRATION_CONCURRENCY). func (p *Pipeline) phaseHydrateApps(ctx context.Context) map[string]bool { affectedUsers := make(map[string]bool) if p.hydrator == nil || p.cacheManager == nil { @@ -162,9 +175,14 @@ func (p *Pipeline) phaseHydrateApps(ctx context.Context) map[string]bool { } total := len(items) - glog.V(2).Infof("Pipeline Phase 2: processing %d pending apps", total) + batchSize := p.hydrationConcurrency + if batchSize <= 0 { + batchSize = defaultHydrationConcurrency + } + + glog.V(2).Infof("Pipeline Phase 2: processing %d pending apps (concurrency=%d)", total, batchSize) - for idx, item := range items { + for batchStart := 0; batchStart < total; batchStart += batchSize { select { case <-ctx.Done(): return affectedUsers @@ -173,15 +191,46 @@ func (p *Pipeline) phaseHydrateApps(ctx context.Context) map[string]bool { default: } - appID, appName := getAppIdentifiers(item.Pending) - glog.V(2).Infof("Pipeline Phase 2: [%d/%d] %s %s (user=%s, source=%s)", - idx+1, total, appID, appName, item.UserID, item.SourceID) + batchEnd := batchStart + batchSize + if batchEnd > total { + batchEnd = total + } + batch := items[batchStart:batchEnd] - hydrated := p.hydrator.HydrateSingleApp(ctx, item.UserID, item.SourceID, item.Pending) - if hydrated && p.dataWatcher != nil { - p.dataWatcher.ProcessSingleAppToLatest(item.UserID, item.SourceID, item.Pending) + // Log batch items + for i, item := range batch { + appID, appName := getAppIdentifiers(item.Pending) + glog.V(2).Infof("Pipeline Phase 2: [%d/%d] %s %s (user=%s, source=%s)", + batchStart+i+1, total, appID, appName, item.UserID, item.SourceID) + } + + // Process batch concurrently + type hydrateResult struct { + idx int + hydrated bool + } + results := make([]hydrateResult, len(batch)) + var wg sync.WaitGroup + + for i, item := range batch { + wg.Add(1) + go func(idx int, it PendingItem) { + defer wg.Done() + results[idx] = hydrateResult{ + idx: idx, + hydrated: p.hydrator.HydrateSingleApp(ctx, it.UserID, it.SourceID, it.Pending), + } + }(i, item) + } + wg.Wait() + + // Move hydrated apps to Latest (sequential — writes to the same source slice) + for i, item := range batch { + if results[i].hydrated && p.dataWatcher != nil { + p.dataWatcher.ProcessSingleAppToLatest(item.UserID, item.SourceID, item.Pending) + } + affectedUsers[item.UserID] = true } - affectedUsers[item.UserID] = true } return affectedUsers From 8232ddb50a50652c3912bae312b240e2b613e115 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 4 Mar 2026 09:22:54 +0000 Subject: [PATCH 16/45] perf: bypass sync throttle on remote hash change; remove ForceSync rate limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SyncOnce: when the 5-minute throttle is active, do a lightweight HTTP probe to each remote source's hash endpoint. If any source's remote hash differs from the locally cached Others.Hash, bypass the throttle and run a full sync cycle immediately. Network errors during the probe are silently ignored (conservative: don't force sync on failure). ForceSync: remove the 1-minute rate limiter. In the Pipeline architecture, ForceSync is called exactly once per cycle in Phase 5, so the rate limiter is unnecessary. The Hydrator's databaseSyncMonitor was the only other caller that could trigger it, and it was always hitting the rate limit set by Phase 5 — producing useless error logs every 30 seconds. Co-authored-by: aby913 --- internal/v2/appinfo/cache.go | 42 -------------------- internal/v2/appinfo/syncer.go | 73 +++++++++++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 46 deletions(-) diff --git a/internal/v2/appinfo/cache.go b/internal/v2/appinfo/cache.go index d8a6c4d..348f9c0 100644 --- a/internal/v2/appinfo/cache.go +++ b/internal/v2/appinfo/cache.go @@ -44,9 +44,6 @@ type CacheManager struct { unlockCount int64 } - // ForceSync rate limiting - forceSyncMutex sync.Mutex - lastForceSync time.Time } // startLockWatchdog starts a 1s watchdog for write lock sections and returns a stopper. @@ -1693,19 +1690,7 @@ func (cm *CacheManager) requestSync(req SyncRequest) { } // ForceSync forces immediate synchronization of all data to Redis -// Rate limited to once per minute to prevent excessive Redis operations func (cm *CacheManager) ForceSync() error { - // Check rate limiting first - cm.forceSyncMutex.Lock() - now := time.Now() - if !cm.lastForceSync.IsZero() && now.Sub(cm.lastForceSync) < time.Minute { - cm.forceSyncMutex.Unlock() - glog.V(4).Infof("ForceSync: Rate limited, last sync was %v ago", now.Sub(cm.lastForceSync)) - return fmt.Errorf("force sync rate limited, please wait %v", time.Minute-now.Sub(cm.lastForceSync)) - } - cm.lastForceSync = now - cm.forceSyncMutex.Unlock() - glog.V(2).Infof("Force syncing all cache data to Redis") // 1. Quickly obtain a data snapshot to minimize lock holding time @@ -1758,33 +1743,6 @@ func (cm *CacheManager) ForceSync() error { } } -// CanForceSync checks if ForceSync can be executed (not rate limited) -func (cm *CacheManager) CanForceSync() bool { - cm.forceSyncMutex.Lock() - defer cm.forceSyncMutex.Unlock() - - now := time.Now() - return cm.lastForceSync.IsZero() || now.Sub(cm.lastForceSync) >= time.Minute -} - -// GetForceSyncCooldown returns the remaining cooldown time for ForceSync -func (cm *CacheManager) GetForceSyncCooldown() time.Duration { - cm.forceSyncMutex.Lock() - defer cm.forceSyncMutex.Unlock() - - now := time.Now() - if cm.lastForceSync.IsZero() { - return 0 - } - - elapsed := now.Sub(cm.lastForceSync) - if elapsed >= time.Minute { - return 0 - } - - return time.Minute - elapsed -} - // GetAllUsersData returns all users data from cache using single global lock func (cm *CacheManager) GetAllUsersData() map[string]*UserData { cm.mutex.RLock() diff --git a/internal/v2/appinfo/syncer.go b/internal/v2/appinfo/syncer.go index d9fb0e0..f0b088a 100644 --- a/internal/v2/appinfo/syncer.go +++ b/internal/v2/appinfo/syncer.go @@ -2,6 +2,7 @@ package appinfo import ( "context" + "encoding/json" "fmt" "sort" "strings" @@ -14,6 +15,7 @@ import ( "market/internal/v2/types" "market/internal/v2/utils" + "github.com/go-resty/resty/v2" "github.com/golang/glog" ) @@ -141,17 +143,24 @@ func (s *Syncer) StartWithOptions(ctx context.Context, enableSyncLoop bool) erro // SyncOnce executes one sync cycle if at least syncInterval has elapsed // since the last execution, OR if the sync-relevant configuration has changed -// (e.g. a new source or user was added/removed). Called by Pipeline on every tick. +// (e.g. a new source or user was added/removed), OR if a remote source's +// data hash has changed (lightweight probe). Called by Pipeline on every tick. func (s *Syncer) SyncOnce(ctx context.Context) { if !s.isRunning.Load() { return } configChanged, reason := s.hasSyncRelevantConfigChanged() + throttled := !s.lastSyncExecuted.IsZero() && time.Since(s.lastSyncExecuted) < s.syncInterval - if !configChanged && !s.lastSyncExecuted.IsZero() && time.Since(s.lastSyncExecuted) < s.syncInterval { - glog.V(3).Infof("SyncOnce: skipping, last sync was %v ago (interval: %v)", time.Since(s.lastSyncExecuted), s.syncInterval) - return + if !configChanged && throttled { + if s.hasAnyRemoteHashChanged(ctx) { + glog.V(2).Info("SyncOnce: remote data hash changed, forcing sync cycle") + } else { + glog.V(3).Infof("SyncOnce: skipping, last sync was %v ago (interval: %v)", + time.Since(s.lastSyncExecuted), s.syncInterval) + return + } } if configChanged { glog.V(2).Infof("SyncOnce: %s, forcing sync cycle", reason) @@ -162,6 +171,62 @@ func (s *Syncer) SyncOnce(ctx context.Context) { } } +// hasAnyRemoteHashChanged does a lightweight HTTP probe to each remote source's +// hash endpoint and returns true if any source's remote hash differs from the +// locally cached Others.Hash. Errors are silently ignored (conservative: don't +// force sync on network failure). +func (s *Syncer) hasAnyRemoteHashChanged(ctx context.Context) bool { + config := s.settingsManager.GetMarketSources() + if config == nil { + return false + } + + endpoints := s.settingsManager.GetAPIEndpoints() + hashPath := "/api/v1/appstore/hash" + if endpoints != nil && endpoints.HashPath != "" { + hashPath = endpoints.HashPath + } + + version := getVersionForSync() + client := resty.New().SetTimeout(3 * time.Second) + + for _, src := range config.Sources { + if src.Type != "remote" { + continue + } + + hashURL := s.settingsManager.BuildAPIURL(src.BaseURL, hashPath) + if strings.HasPrefix(hashURL, "file://") { + continue + } + + resp, err := client.R().SetContext(ctx).SetQueryParam("version", version).Get(hashURL) + if err != nil || resp.StatusCode() != 200 { + continue + } + + var hr struct { + Hash string `json:"hash"` + } + if json.Unmarshal(resp.Body(), &hr) != nil || hr.Hash == "" { + continue + } + + localHash := "" + if cm := s.cacheManager.Load(); cm != nil { + localHash = cm.GetSourceOthersHash(src.ID) + } + + if hr.Hash != localHash { + glog.V(2).Infof("SyncOnce: hash changed for source %s (remote=%s, local=%s)", + src.ID, hr.Hash, localHash) + return true + } + } + + return false +} + // hasSyncRelevantConfigChanged checks whether the remote source list or the // user list has changed since the last sync cycle. Returns true with a // human-readable reason when a change is detected. From c7cf9e25bd29aa081786ac71f78d90ad3c5f185a Mon Sep 17 00:00:00 2001 From: aby913 Date: Wed, 4 Mar 2026 17:25:59 +0800 Subject: [PATCH 17/45] refactor: add logs --- internal/v2/appinfo/pipeline.go | 18 ++++++++++-------- .../v2/appinfo/syncerfn/detail_fetch_step.go | 2 +- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go index dc69c06..7328187 100644 --- a/internal/v2/appinfo/pipeline.go +++ b/internal/v2/appinfo/pipeline.go @@ -58,11 +58,13 @@ func NewPipeline(cacheManager *CacheManager, cache *types.CacheData, interval ti } } -func (p *Pipeline) SetSyncer(s *Syncer) { p.syncer = s } -func (p *Pipeline) SetHydrator(h *Hydrator) { p.hydrator = h } -func (p *Pipeline) SetDataWatcher(dw *DataWatcher) { p.dataWatcher = dw } -func (p *Pipeline) SetDataWatcherRepo(dwr *DataWatcherRepo) { p.dataWatcherRepo = dwr } -func (p *Pipeline) SetStatusCorrectionChecker(scc *StatusCorrectionChecker) { p.statusCorrectionChecker = scc } +func (p *Pipeline) SetSyncer(s *Syncer) { p.syncer = s } +func (p *Pipeline) SetHydrator(h *Hydrator) { p.hydrator = h } +func (p *Pipeline) SetDataWatcher(dw *DataWatcher) { p.dataWatcher = dw } +func (p *Pipeline) SetDataWatcherRepo(dwr *DataWatcherRepo) { p.dataWatcherRepo = dwr } +func (p *Pipeline) SetStatusCorrectionChecker(scc *StatusCorrectionChecker) { + p.statusCorrectionChecker = scc +} func (p *Pipeline) Start(ctx context.Context) error { if p.isRunning.Load() { @@ -111,6 +113,8 @@ func (p *Pipeline) run(ctx context.Context) { } defer p.mutex.Unlock() + glog.V(2).Info("Pipeline: [LOOP] cycle start") + startTime := time.Now() // Phase 1-4: only modify data, no hash calculation or ForceSync @@ -139,9 +143,7 @@ func (p *Pipeline) run(ctx context.Context) { p.phaseHashAndSync(allAffected) - if elapsed := time.Since(startTime); elapsed > 5*time.Second { - glog.V(2).Infof("Pipeline: cycle completed in %v", elapsed) - } + glog.V(2).Infof("Pipeline: [LOOP] cycle completed in %v", time.Since(startTime)) } // phaseSyncer fetches remote data diff --git a/internal/v2/appinfo/syncerfn/detail_fetch_step.go b/internal/v2/appinfo/syncerfn/detail_fetch_step.go index 05ff09a..b0478fa 100644 --- a/internal/v2/appinfo/syncerfn/detail_fetch_step.go +++ b/internal/v2/appinfo/syncerfn/detail_fetch_step.go @@ -580,7 +580,7 @@ func (d *DetailFetchStep) removeAppFromCache(appID string, appInfoMap map[string } // IMPORTANT: use MarketSource.ID as the key for Sources map (not Name) sourceID := source.ID - glog.V(2).Infof("Removing all versions of app %s (name: %s) from cache for source: %s (sourceID=%s)", appID, appName, source.Name, sourceID) + glog.V(2).Infof("Removing all versions of app %s (name: %s) from cache for source: %s (sourceID=%s) [SUSPEND/REMOVE]", appID, appName, source.Name, sourceID) if data.CacheManager == nil { glog.V(3).Infof("Warning: CacheManager is nil, cannot remove app from cache") From 6689b2ec32f98ea928d10a5ee35041043c0e8c99 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 4 Mar 2026 09:36:37 +0000 Subject: [PATCH 18/45] fix: log when Phase 2 has no pending apps to process Co-authored-by: aby913 --- internal/v2/appinfo/pipeline.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go index 7328187..e08da24 100644 --- a/internal/v2/appinfo/pipeline.go +++ b/internal/v2/appinfo/pipeline.go @@ -173,6 +173,7 @@ func (p *Pipeline) phaseHydrateApps(ctx context.Context) map[string]bool { items := p.cacheManager.CollectAllPendingItems() if len(items) == 0 { + glog.V(2).Info("Pipeline Phase 2: no pending apps to process") return affectedUsers } From d2e7df9165f6d328b7fe7555f1e70677fcac814b Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 4 Mar 2026 11:28:20 +0000 Subject: [PATCH 19/45] fix: skip pending items from deleted users/sources in Phase 2 CollectAllPendingItems returns a snapshot taken before async deletions (RemoveUserData, SyncMarketSourcesToCache) have completed. Without this check, Phase 2 would attempt to hydrate apps for users or sources that no longer exist, resulting in chart-repo 500 errors and useless render-failed entries. Before batching, verify each pending item's user+source still exists via CacheManager.GetSourceData. Items referencing deleted users or sources are logged and skipped. Co-authored-by: aby913 --- internal/v2/appinfo/pipeline.go | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go index e08da24..7d0b247 100644 --- a/internal/v2/appinfo/pipeline.go +++ b/internal/v2/appinfo/pipeline.go @@ -183,6 +183,31 @@ func (p *Pipeline) phaseHydrateApps(ctx context.Context) map[string]bool { batchSize = defaultHydrationConcurrency } + // Filter out items whose user or source has been deleted since collection. + // CollectAllPendingItems returns a snapshot; async deletions (RemoveUserData, + // SyncMarketSourcesToCache) may have removed the user/source in the meantime. + validItems := make([]PendingItem, 0, len(items)) + for _, item := range items { + if p.cacheManager.GetSourceData(item.UserID, item.SourceID) == nil { + appID, appName := getAppIdentifiers(item.Pending) + glog.V(2).Infof("Pipeline Phase 2: skipping %s %s - user %s or source %s no longer exists", + appID, appName, item.UserID, item.SourceID) + continue + } + validItems = append(validItems, item) + } + + if len(validItems) == 0 { + glog.V(2).Infof("Pipeline Phase 2: all %d pending apps filtered out (user/source deleted)", total) + return affectedUsers + } + + if len(validItems) < total { + glog.V(2).Infof("Pipeline Phase 2: %d/%d pending apps remain after filtering deleted users/sources", + len(validItems), total) + } + + total = len(validItems) glog.V(2).Infof("Pipeline Phase 2: processing %d pending apps (concurrency=%d)", total, batchSize) for batchStart := 0; batchStart < total; batchStart += batchSize { @@ -198,7 +223,7 @@ func (p *Pipeline) phaseHydrateApps(ctx context.Context) map[string]bool { if batchEnd > total { batchEnd = total } - batch := items[batchStart:batchEnd] + batch := validItems[batchStart:batchEnd] // Log batch items for i, item := range batch { From 77c4d24e32d06390def79a09fb9571b18821a88d Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 4 Mar 2026 11:49:41 +0000 Subject: [PATCH 20/45] fix: update in-memory lastProcessedID after processing state changes processStateChanges wrote the last processed ID to Redis but never updated the in-memory dwr.lastProcessedID field. On the next pipeline cycle, fetchStateChanges used the stale in-memory ID, causing the same batch of state changes (especially image_info_updated events) to be fetched and re-processed every cycle. Co-authored-by: aby913 --- internal/v2/appinfo/datawatcher_repo.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/v2/appinfo/datawatcher_repo.go b/internal/v2/appinfo/datawatcher_repo.go index a9b930a..11fd47b 100644 --- a/internal/v2/appinfo/datawatcher_repo.go +++ b/internal/v2/appinfo/datawatcher_repo.go @@ -265,6 +265,8 @@ func (dwr *DataWatcherRepo) processStateChanges() map[string]bool { lastProcessedID = change.ID } + dwr.lastProcessedID = lastProcessedID + ctx := context.Background() err = dwr.redisClient.client.Set(ctx, "datawatcher:last_processed_id", strconv.FormatInt(lastProcessedID, 10), 0).Err() if err != nil { From 0f22ce4d4d3cc5523e1d80ba3867fbc82f9e667d Mon Sep 17 00:00:00 2001 From: aby913 Date: Thu, 5 Mar 2026 13:56:42 +0800 Subject: [PATCH 21/45] refactor: improve pipeline log --- internal/v2/appinfo/cache.go | 51 ++++++++++++++++++++++++--------- internal/v2/appinfo/pipeline.go | 6 ++-- 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/internal/v2/appinfo/cache.go b/internal/v2/appinfo/cache.go index 348f9c0..2ab40cd 100644 --- a/internal/v2/appinfo/cache.go +++ b/internal/v2/appinfo/cache.go @@ -22,17 +22,17 @@ import ( // CacheManager manages the in-memory cache and Redis synchronization type CacheManager struct { - cache *CacheData - redisClient *RedisClient - userConfig *UserConfig - stateMonitor *utils.StateMonitor // State monitor for change detection - dataSender *DataSender // Direct data sender for bypassing state monitor - mutex sync.RWMutex - syncChannel chan SyncRequest - stopChannel chan bool - isRunning bool - settingsManager *settings.SettingsManager - cleanupTicker *time.Ticker // Timer for periodic cleanup of AppRenderFailed + cache *CacheData + redisClient *RedisClient + userConfig *UserConfig + stateMonitor *utils.StateMonitor // State monitor for change detection + dataSender *DataSender // Direct data sender for bypassing state monitor + mutex sync.RWMutex + syncChannel chan SyncRequest + stopChannel chan bool + isRunning bool + settingsManager *settings.SettingsManager + cleanupTicker *time.Ticker // Timer for periodic cleanup of AppRenderFailed // Lock monitoring lockStats struct { @@ -43,7 +43,6 @@ type CacheManager struct { lockCount int64 unlockCount int64 } - } // startLockWatchdog starts a 1s watchdog for write lock sections and returns a stopper. @@ -851,7 +850,6 @@ func (cm *CacheManager) getSourceData(userID, sourceID string) *SourceData { return nil } - // updateAppStateLatest updates or adds a single app state based on name matching func (cm *CacheManager) updateAppStateLatest(userID, sourceID string, sourceData *SourceData, newAppState *types.AppStateLatestData) { if newAppState == nil { @@ -2517,7 +2515,7 @@ func (cm *CacheManager) ClearAppRenderFailedData() { done := make(chan struct{}, 1) go func() { cm.mutex.Lock() - done <- struct{}{} + done <- struct{}{} }() select { case <-done: @@ -2633,3 +2631,28 @@ func (cm *CacheManager) ListUsers() { } } } + +func (cm *CacheManager) GetCachedData() string { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + var items []map[string]interface{} + + for un, uv := range cm.cache.Users { + var user = make(map[string]interface{}) + var ss = make(map[string]interface{}) + for sn, sv := range uv.Sources { + var apps = make(map[string]interface{}) + apps["latest"] = len(sv.AppInfoLatest) + apps["pending"] = len(sv.AppInfoLatestPending) + apps["failed"] = len(sv.AppRenderFailed) + apps["history"] = len(sv.AppInfoHistory) + ss[sn] = apps + } + user[un] = ss + items = append(items, user) + } + + result, _ := json.Marshal(items) + return string(result) +} diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go index 7d0b247..1ac91b1 100644 --- a/internal/v2/appinfo/pipeline.go +++ b/internal/v2/appinfo/pipeline.go @@ -108,7 +108,7 @@ func (p *Pipeline) loop(ctx context.Context) { func (p *Pipeline) run(ctx context.Context) { if !p.mutex.TryLock() { - glog.V(3).Info("Pipeline: another run in progress, skipping") + glog.Warning("Pipeline: another run in progress, skipping") return } defer p.mutex.Unlock() @@ -143,7 +143,9 @@ func (p *Pipeline) run(ctx context.Context) { p.phaseHashAndSync(allAffected) - glog.V(2).Infof("Pipeline: [LOOP] cycle completed in %v", time.Since(startTime)) + cahedData := p.cacheManager.GetCachedData() + + glog.V(2).Infof("Pipeline: [LOOP] cycle completed in %v, cached: %s", time.Since(startTime), cahedData) } // phaseSyncer fetches remote data From d240ab82666a95e1736a67e72302c4dabf139da2 Mon Sep 17 00:00:00 2001 From: aby913 Date: Thu, 5 Mar 2026 16:04:53 +0800 Subject: [PATCH 22/45] refactor: add logs --- internal/v2/appinfo/cache.go | 14 ++++++++++---- internal/v2/appinfo/datawatcher_state.go | 2 ++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/internal/v2/appinfo/cache.go b/internal/v2/appinfo/cache.go index 2ab40cd..2382019 100644 --- a/internal/v2/appinfo/cache.go +++ b/internal/v2/appinfo/cache.go @@ -1767,10 +1767,7 @@ func (cm *CacheManager) GetAllUsersData() map[string]*UserData { // HasUserStateDataForSource checks if any user has non-empty state data for a specific source func (cm *CacheManager) HasUserStateDataForSource(sourceID string) bool { cm.mutex.RLock() - defer func() { - cm.mutex.RUnlock() - glog.V(4).Infof("[LOCK] cm.mutex.RUnlock() @HasUserStateDataForSource End") - }() + defer cm.mutex.RUnlock() if cm.cache == nil { return false @@ -2647,6 +2644,15 @@ func (cm *CacheManager) GetCachedData() string { apps["pending"] = len(sv.AppInfoLatestPending) apps["failed"] = len(sv.AppRenderFailed) apps["history"] = len(sv.AppInfoHistory) + apps["state"] = len(sv.AppStateLatest) + var status []string + if len(sv.AppStateLatest) > 0 { + for _, state := range sv.AppStateLatest { + status = append(status, fmt.Sprintf("%s_%s", state.Status.Name, state.Status.State)) + } + } + apps["state_apps"] = status + ss[sn] = apps } user[un] = ss diff --git a/internal/v2/appinfo/datawatcher_state.go b/internal/v2/appinfo/datawatcher_state.go index bdf09ed..e84b985 100644 --- a/internal/v2/appinfo/datawatcher_state.go +++ b/internal/v2/appinfo/datawatcher_state.go @@ -1157,6 +1157,8 @@ func (dw *DataWatcherState) processDelayedMessagesBatch() { dw.delayedMessagesMutex.Lock() defer dw.delayedMessagesMutex.Unlock() + glog.Info("DatawatcherState process") + now := time.Now() var remaining []*DelayedMessage maxRetries := 10 // Maximum 10 retries (about 20 seconds total) From 432da81b18562769cde88014b88b5c5d29f345a3 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 5 Mar 2026 09:33:58 +0000 Subject: [PATCH 23/45] refactor: replace TryLock with Lock in TaskModule, extract lock-protected helpers - Replace all TryLock/TryRLock with Lock/RLock across TaskModule - Extract dequeueNextPendingTask(): atomically moves pending task to running - Extract removeRunningTask(): atomically removes task from running map - Extract completeRunningTask(): atomically finds, updates, and removes a running task - Extract handleTaskFailure(): consolidates 5 identical error handling blocks in executeTask - Move I/O operations (DB persist, NATS notify, history record) outside lock scope in all 7 external signal methods (InstallTaskSucceed, InstallTaskFailed, etc.) - Fix missing RLock in checkRunningTasksStatus (was iterating runningTasks without lock) - Simplify HasPendingOrRunningInstallTask to use blocking RLock This eliminates: - Task leaks in runningTasks when TryLock failed (goroutine retry could also fail) - Lost external signals (InstallTaskSucceed etc. returning error on TryLock failure) - Unreliable HasPendingOrRunningInstallTask results causing unnecessary message delays - ~500 lines of duplicated TryLock retry boilerplate Co-authored-by: aby913 --- internal/v2/task/taskmodule.go | 886 +++++++-------------------------- 1 file changed, 191 insertions(+), 695 deletions(-) diff --git a/internal/v2/task/taskmodule.go b/internal/v2/task/taskmodule.go index fbb0f74..6ae7469 100644 --- a/internal/v2/task/taskmodule.go +++ b/internal/v2/task/taskmodule.go @@ -122,108 +122,27 @@ func NewTaskModule() (*TaskModule, error) { // SetHistoryModule sets the history module for recording task events func (tm *TaskModule) SetHistoryModule(historyModule *history.HistoryModule) { - // Retry mechanism for acquiring lock (max 3 attempts with 10ms delay) - maxRetries := 3 - retryDelay := 10 * time.Millisecond - - var lockAcquired bool - for attempt := 0; attempt < maxRetries; attempt++ { - if tm.mu.TryLock() { - lockAcquired = true - break - } - - if attempt < maxRetries-1 { - time.Sleep(retryDelay) - continue - } - } - - if !lockAcquired { - glog.Warningf("[%s] Failed to acquire lock for SetHistoryModule after %d attempts", tm.instanceID, maxRetries) - return - } + tm.mu.Lock() defer tm.mu.Unlock() tm.historyModule = historyModule } // SetDataSender sets the data sender for sending system updates func (tm *TaskModule) SetDataSender(dataSender DataSenderInterface) { - // Retry mechanism for acquiring lock (max 3 attempts with 10ms delay) - maxRetries := 3 - retryDelay := 10 * time.Millisecond - - var lockAcquired bool - for attempt := 0; attempt < maxRetries; attempt++ { - if tm.mu.TryLock() { - lockAcquired = true - break - } - - if attempt < maxRetries-1 { - time.Sleep(retryDelay) - continue - } - } - - if !lockAcquired { - glog.Warningf("[%s] Failed to acquire lock for SetDataSender after %d attempts", tm.instanceID, maxRetries) - return - } + tm.mu.Lock() defer tm.mu.Unlock() tm.dataSender = dataSender } // SetSettingsManager sets the settings manager for accessing Redis func (tm *TaskModule) SetSettingsManager(settingsManager *settings.SettingsManager) { - // Retry mechanism for acquiring lock (max 3 attempts with 10ms delay) - maxRetries := 3 - retryDelay := 10 * time.Millisecond - - var lockAcquired bool - for attempt := 0; attempt < maxRetries; attempt++ { - if tm.mu.TryLock() { - lockAcquired = true - break - } - - if attempt < maxRetries-1 { - time.Sleep(retryDelay) - continue - } - } - - if !lockAcquired { - glog.Warningf("[%s] Failed to acquire lock for SetSettingsManager after %d attempts", tm.instanceID, maxRetries) - return - } + tm.mu.Lock() defer tm.mu.Unlock() tm.settingsManager = settingsManager } // AddTask adds a new task to the pending queue func (tm *TaskModule) AddTask(taskType TaskType, appName string, user string, metadata map[string]interface{}, callback TaskCallback) (*Task, error) { - // Retry mechanism for acquiring lock (max 3 attempts with 10ms delay) - maxRetries := 3 - retryDelay := 10 * time.Millisecond - - var lockAcquired bool - for attempt := 0; attempt < maxRetries; attempt++ { - if tm.mu.TryLock() { - lockAcquired = true - break - } - - if attempt < maxRetries-1 { - time.Sleep(retryDelay) - continue - } - } - - if !lockAcquired { - return nil, fmt.Errorf("failed to acquire lock for AddTask after %d attempts", maxRetries) - } - if metadata == nil { metadata = make(map[string]interface{}) } @@ -239,19 +158,16 @@ func (tm *TaskModule) AddTask(taskType TaskType, appName string, user string, me Callback: callback, } - // Add to pending queue first (fast memory operation) + tm.mu.Lock() tm.pendingTasks = append(tm.pendingTasks, task) tm.mu.Unlock() - // Persist task outside of lock (database operation may be slow) if err := tm.persistTask(task); err != nil { glog.Errorf("[%s] Failed to persist task %s: %v", tm.instanceID, task.ID, err) - // Don't return error - task is already in memory queue, will be persisted later } glog.V(2).Infof("[%s] Task added: ID=%s, Type=%d, AppName=%s, User=%s, HasCallback=%v", tm.instanceID, task.ID, task.Type, task.AppName, user, callback != nil) - // Record task addition in history (outside of lock) tm.recordTaskHistory(task, user) return task, nil @@ -466,42 +382,126 @@ func (tm *TaskModule) taskExecutor() { } } -// executeNextTask gets the earliest pending task and executes it -func (tm *TaskModule) executeNextTask() { - if !tm.mu.TryLock() { - return +// dequeueNextPendingTask atomically moves the next pending task to running state. +// Returns nil if no pending task is available. +func (tm *TaskModule) dequeueNextPendingTask() *Task { + tm.mu.Lock() + defer tm.mu.Unlock() + + if len(tm.pendingTasks) == 0 { + return nil } - var task *Task - if len(tm.pendingTasks) > 0 { - // Get the first task (FIFO) - task = tm.pendingTasks[0] - tm.pendingTasks = tm.pendingTasks[1:] + task := tm.pendingTasks[0] + tm.pendingTasks = tm.pendingTasks[1:] - // Move to running tasks - task.Status = Running - now := time.Now() - task.StartedAt = &now - tm.runningTasks[task.ID] = task - } + task.Status = Running + now := time.Now() + task.StartedAt = &now + tm.runningTasks[task.ID] = task + return task +} + +// removeRunningTask atomically removes a task from the running tasks map. +func (tm *TaskModule) removeRunningTask(taskID string) { + tm.mu.Lock() + delete(tm.runningTasks, taskID) tm.mu.Unlock() +} + +// completeRunningTask atomically finds a running task by opID or appName+user, +// updates its status, and removes it from the running tasks map. +// Returns the task for I/O operations outside the lock, or an error if not found. +func (tm *TaskModule) completeRunningTask( + opID, appName, user string, + taskTypes []TaskType, + status TaskStatus, + result, errorMsg string, +) (*Task, error) { + tm.mu.Lock() + defer tm.mu.Unlock() + + var target *Task + + if opID != "" { + for _, t := range tm.runningTasks { + if t.OpID == opID && matchTaskTypes(t.Type, taskTypes) { + target = t + break + } + } + } + + if target == nil && appName != "" && user != "" { + for _, t := range tm.runningTasks { + if matchTaskTypes(t.Type, taskTypes) && t.AppName == appName && t.User == user { + target = t + break + } + } + } + if target == nil { + return nil, fmt.Errorf("no matching running task found (opID=%s, app=%s, user=%s)", opID, appName, user) + } + + target.Status = status + now := time.Now() + target.CompletedAt = &now + target.Result = result + if errorMsg != "" { + target.ErrorMsg = errorMsg + } + + delete(tm.runningTasks, target.ID) + return target, nil +} + +func matchTaskTypes(t TaskType, types []TaskType) bool { + for _, tt := range types { + if t == tt { + return true + } + } + return false +} + +// executeNextTask gets the earliest pending task and executes it +func (tm *TaskModule) executeNextTask() { + task := tm.dequeueNextPendingTask() if task == nil { return } - // Persist task state outside of lock (database operation may be slow) if err := tm.persistTask(task); err != nil { glog.Errorf("[%s] Failed to persist running task state for %s: %v", tm.instanceID, task.ID, err) } glog.V(2).Infof("[%s] Executing task: ID=%s, Type=%d, AppName=%s", tm.instanceID, task.ID, task.Type, task.AppName) - // Execute the task outside of lock (may take minutes) tm.executeTask(task) } +// handleTaskFailure handles the common failure path for task execution +func (tm *TaskModule) handleTaskFailure(task *Task, result string, err error, failureDesc string) { + glog.Errorf("[TASK] %s for task: %s, name: %s, error: %v", failureDesc, task.ID, task.AppName, err) + task.Result = result + task.Status = Failed + task.ErrorMsg = fmt.Sprintf("%s: %v, task: %s", failureDesc, err, task.ID) + now := time.Now() + task.CompletedAt = &now + + if task.Callback != nil { + task.Callback(result, err) + } + + tm.removeRunningTask(task.ID) + tm.finalizeTaskPersistence(task) + tm.sendTaskFinishedUpdate(task, "failed") + tm.recordTaskResult(task, result, err) +} + // executeTask executes the actual task logic func (tm *TaskModule) executeTask(task *Task) { var result string @@ -510,264 +510,59 @@ func (tm *TaskModule) executeTask(task *Task) { glog.V(2).Infof("[TASK] Starting task execution: ID=%s, Type=%s, App=%s, User=%s", task.ID, getTaskTypeString(task.Type), task.AppName, task.User) - // Send task execution system update tm.sendTaskExecutionUpdate(task) switch task.Type { case InstallApp: - // Execute app installation glog.V(2).Infof("[TASK] Executing app installation for task: %s", task.ID) result, err = tm.AppInstall(task) - task.Result = result if err != nil { - glog.Errorf("[TASK] App installation failed for task: %s, name: %s, error: %v", task.ID, task.AppName, err) - task.Status = Failed - task.ErrorMsg = fmt.Sprintf("Installation failed: %v, task: %s", err, task.ID) - now := time.Now() - task.CompletedAt = &now - - // Call callback if exists (for synchronous requests) - if task.Callback != nil { - glog.Errorf("[TASK] Calling callback for failed task: %s", task.ID) - task.Callback(result, err) - } - - // Remove failed task from running tasks - if tm.mu.TryLock() { - delete(tm.runningTasks, task.ID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed failed task from running tasks: ID=%s", task.ID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock to remove task from running tasks: ID=%s, will retry later", task.ID) - // Retry in a goroutine with TryLock - go func(taskID string) { - time.Sleep(100 * time.Millisecond) - if tm.mu.TryLock() { - delete(tm.runningTasks, taskID) - tm.mu.Unlock() - glog.V(2).Infof("Removed failed task from running tasks (retry): ID=%s", taskID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock on retry for task: ID=%s, task will be cleaned up later", taskID) - } - }(task.ID) - } - - tm.finalizeTaskPersistence(task) - - // Send task finished system update - tm.sendTaskFinishedUpdate(task, "failed") - - tm.recordTaskResult(task, result, err) + tm.handleTaskFailure(task, result, err, "Installation failed") return } glog.V(2).Infof("[TASK] App installation completed successfully for task: %s", task.ID) case UninstallApp: - // Execute app uninstallation glog.V(2).Infof("[TASK] Executing app uninstallation for task: %s", task.ID) result, err = tm.AppUninstall(task) - task.Result = result if err != nil { - glog.Errorf("[TASK] App uninstallation failed for task: %s, name: %s, error: %v", task.ID, task.AppName, err) - task.Status = Failed - task.ErrorMsg = fmt.Sprintf("Uninstallation failed: %v", err) - now := time.Now() - task.CompletedAt = &now - - // Call callback if exists (for synchronous requests) - if task.Callback != nil { - glog.Errorf("[TASK] Calling callback for failed task: %s", task.ID) - task.Callback(result, err) - } - - // Remove failed task from running tasks - if tm.mu.TryLock() { - delete(tm.runningTasks, task.ID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed failed task from running tasks: ID=%s", task.ID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock to remove task from running tasks: ID=%s, will retry later", task.ID) - // Retry in a goroutine with TryLock - go func(taskID string) { - time.Sleep(100 * time.Millisecond) - if tm.mu.TryLock() { - delete(tm.runningTasks, taskID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed failed task from running tasks (retry): ID=%s", taskID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock on retry for task: ID=%s, task will be cleaned up later", taskID) - } - }(task.ID) - } - - tm.finalizeTaskPersistence(task) - - // Send task finished system update - tm.sendTaskFinishedUpdate(task, "failed") - - tm.recordTaskResult(task, result, err) + tm.handleTaskFailure(task, result, err, "Uninstallation failed") return } glog.V(2).Infof("[TASK] App uninstallation completed successfully for task: %s", task.ID) case CancelAppInstall: - // Execute app cancel - cancel running install tasks glog.V(2).Infof("[TASK] Executing app cancel for task: %s", task.ID) - - // First, call AppCancel to send cancel request to app service result, err = tm.AppCancel(task) - task.Result = result if err != nil { - glog.Errorf("[TASK] App cancel failed for task: %s, name: %s, error: %v", task.ID, task.AppName, err) - task.Status = Failed - task.ErrorMsg = fmt.Sprintf("Cancel failed: %v", err) - now := time.Now() - task.CompletedAt = &now - - // Call callback if exists (for synchronous requests) - if task.Callback != nil { - glog.Errorf("[TASK] Calling callback for failed task: %s", task.ID) - task.Callback(result, err) - } - - // Remove failed task from running tasks - if tm.mu.TryLock() { - delete(tm.runningTasks, task.ID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed failed task from running tasks: ID=%s", task.ID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock to remove task from running tasks: ID=%s, will retry later", task.ID) - // Retry in a goroutine with TryLock - go func(taskID string) { - time.Sleep(100 * time.Millisecond) - if tm.mu.TryLock() { - delete(tm.runningTasks, taskID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed failed task from running tasks (retry): ID=%s", taskID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock on retry for task: ID=%s, task will be cleaned up later", taskID) - } - }(task.ID) - } - - tm.finalizeTaskPersistence(task) - - // Send task finished system update - tm.sendTaskFinishedUpdate(task, "failed") - - tm.recordTaskResult(task, result, err) + tm.handleTaskFailure(task, result, err, "Cancel failed") return } - // Then, call InstallTaskCanceled to mark the task as canceled in our system - err = tm.InstallTaskCanceled(task.AppName, "", "", task.User) - if err != nil { - glog.Errorf("[TASK] InstallTaskCanceled failed for task: %s, app: %s, error: %v", task.ID, task.AppName, err) - // Don't fail the entire operation if InstallTaskCanceled fails - // Just log the error and continue - glog.Errorf("[TASK] Warning: InstallTaskCanceled failed but AppCancel succeeded for task: %s", task.ID) + if cancelErr := tm.InstallTaskCanceled(task.AppName, "", "", task.User); cancelErr != nil { + glog.Errorf("[TASK] InstallTaskCanceled failed for task: %s, app: %s, error: %v", task.ID, task.AppName, cancelErr) } - - glog.V(2).Infof("App cancel completed successfully for task: %s, app: %s", task.ID, task.AppName) + glog.V(2).Infof("[TASK] App cancel completed successfully for task: %s, app: %s", task.ID, task.AppName) case UpgradeApp: - // Execute app upgrade glog.V(2).Infof("[TASK] Executing app upgrade for task: %s", task.ID) result, err = tm.AppUpgrade(task) - task.Result = result if err != nil { - glog.Errorf("[TASK] App upgrade failed for task: %s, app: %s, error: %v", task.ID, task.AppName, err) - task.Status = Failed - task.ErrorMsg = fmt.Sprintf("Upgrade failed: %v", err) - now := time.Now() - task.CompletedAt = &now - - // Call callback if exists (for synchronous requests) - if task.Callback != nil { - glog.Errorf("[TASK] Calling callback for failed task: %s", task.ID) - task.Callback(result, err) - } - - // Remove failed task from running tasks - if tm.mu.TryLock() { - delete(tm.runningTasks, task.ID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed failed task from running tasks: ID=%s", task.ID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock to remove task from running tasks: ID=%s, will retry later", task.ID) - // Retry in a goroutine with TryLock - go func(taskID string) { - time.Sleep(100 * time.Millisecond) - if tm.mu.TryLock() { - delete(tm.runningTasks, taskID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed failed task from running tasks (retry): ID=%s", taskID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock on retry for task: ID=%s, task will be cleaned up later", taskID) - } - }(task.ID) - } - - tm.finalizeTaskPersistence(task) - - // Send task finished system update - tm.sendTaskFinishedUpdate(task, "failed") - - tm.recordTaskResult(task, result, err) + tm.handleTaskFailure(task, result, err, "Upgrade failed") return } glog.V(2).Infof("[TASK] App upgrade completed successfully for task: %s, app: %s", task.ID, task.AppName) case CloneApp: - // Execute app clone glog.V(2).Infof("[TASK] Executing app clone for task: %s", task.ID) result, err = tm.AppClone(task) - task.Result = result if err != nil { - glog.Errorf("[TASK] App clone failed for task: %s, app: %s, error: %v", task.ID, task.AppName, err) - task.Status = Failed - task.ErrorMsg = fmt.Sprintf("Clone failed: %v", err) - now := time.Now() - task.CompletedAt = &now - - // Call callback if exists (for synchronous requests) - if task.Callback != nil { - glog.Errorf("[TASK] Calling callback for failed task: %s", task.ID) - task.Callback(result, err) - } - - // Remove failed task from running tasks - if tm.mu.TryLock() { - delete(tm.runningTasks, task.ID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed failed task from running tasks: ID=%s", task.ID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock to remove task from running tasks: ID=%s, will retry later", task.ID) - // Retry in a goroutine with TryLock - go func(taskID string) { - time.Sleep(100 * time.Millisecond) - if tm.mu.TryLock() { - delete(tm.runningTasks, taskID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed failed task from running tasks (retry): ID=%s", taskID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock on retry for task: ID=%s, task will be cleaned up later", taskID) - } - }(task.ID) - } - - tm.finalizeTaskPersistence(task) - - // Send task finished system update - tm.sendTaskFinishedUpdate(task, "failed") - - tm.recordTaskResult(task, result, err) + tm.handleTaskFailure(task, result, err, "Clone failed") return } glog.V(2).Infof("[TASK] App clone completed successfully for task: %s, app: %s", task.ID, task.AppName) } - // Task completed successfully task.Result = result task.Status = Completed now := time.Now() @@ -775,34 +570,10 @@ func (tm *TaskModule) executeTask(task *Task) { glog.V(2).Infof("[TASK] Task completed successfully: ID=%s, Type=%s, AppName=%s, User=%s, Duration=%v", task.ID, getTaskTypeString(task.Type), task.AppName, task.User, now.Sub(*task.StartedAt)) - // Log the result summary - glog.V(2).Infof("[TASK] Task result summary: ID=%s, Result length=%d bytes", task.ID, len(result)) - - if tm.mu.TryLock() { - delete(tm.runningTasks, task.ID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed completed task from running tasks: ID=%s", task.ID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock to remove task from running tasks: ID=%s, will retry later", task.ID) - // Retry in a goroutine with TryLock - go func(taskID string) { - time.Sleep(100 * time.Millisecond) - if tm.mu.TryLock() { - delete(tm.runningTasks, taskID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed completed task from running tasks (retry): ID=%s", taskID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock on retry for task: ID=%s, task will be cleaned up later", taskID) - } - }(task.ID) - } - + tm.removeRunningTask(task.ID) tm.finalizeTaskPersistence(task) - - // Send task finished system update tm.sendTaskFinishedUpdate(task, "succeed") - // Call callback if exists (for synchronous requests) if task.Callback != nil { glog.V(3).Infof("[TASK] Calling callback for successful task: %s", task.ID) task.Callback(result, nil) @@ -902,6 +673,8 @@ func (tm *TaskModule) statusChecker() { // checkRunningTasksStatus checks the status of all running tasks func (tm *TaskModule) checkRunningTasksStatus() { + tm.mu.RLock() + defer tm.mu.RUnlock() for taskID, task := range tm.runningTasks { glog.V(2).Infof("[TASK] Checking status for task: ID=%s", taskID) @@ -1114,23 +887,17 @@ func (tm *TaskModule) GetInstanceID() string { } // HasPendingOrRunningInstallTask checks if there are any pending or running install/clone tasks for the given app and user -// Returns (hasTask, lockAcquired) where hasTask indicates if there are such tasks, and lockAcquired indicates if the lock was successfully acquired -// If lockAcquired is false, the result is unreliable and the caller should handle accordingly (e.g., delay processing) +// Returns (hasTask, lockAcquired) - lockAcquired is always true with blocking lock, kept for API compatibility func (tm *TaskModule) HasPendingOrRunningInstallTask(appName, user string) (hasTask bool, lockAcquired bool) { - if !tm.mu.TryRLock() { - glog.Warningf("[TryLock] failed to acquire lock for HasPendingOrRunningInstallTask, user: %s, app: %s", user, appName) - return false, false - } + tm.mu.RLock() defer tm.mu.RUnlock() - // Check running tasks for _, t := range tm.runningTasks { if t.AppName == appName && t.User == user && (t.Type == InstallApp || t.Type == CloneApp) { return true, true } } - // Check pending tasks for _, t := range tm.pendingTasks { if t.AppName == appName && t.User == user && (t.Type == InstallApp || t.Type == CloneApp) { return true, true @@ -1142,401 +909,130 @@ func (tm *TaskModule) HasPendingOrRunningInstallTask(appName, user string) (hasT // InstallTaskSucceed marks an install or clone task as completed successfully by opID or appName+user func (tm *TaskModule) InstallTaskSucceed(opID, appName, user string) error { - if !tm.mu.TryLock() { - glog.Warningf("[TryLock] failed to acquire lock for InstallTaskSucceed, user: %s, opId: %s, app: %s", user, opID, appName) - return fmt.Errorf("failed to acquire lock for InstallTaskSucceed") - } - defer tm.mu.Unlock() - - // First try to find the install or clone task with matching opID in running tasks - var targetTask *Task - for _, task := range tm.runningTasks { - if task.OpID == opID && (task.Type == InstallApp || task.Type == CloneApp) { - targetTask = task - break - } - } - - // If opID match failed, try to find by appName and user - if targetTask == nil && appName != "" && user != "" { - for _, task := range tm.runningTasks { - if (task.Type == InstallApp || task.Type == CloneApp) && task.AppName == appName && task.User == user { - targetTask = task - break - } - } - } - - if targetTask == nil { - glog.Warningf("[%s] InstallTaskSucceed - No running install or clone task found with opID: %s or appName: %s, user: %s", - tm.instanceID, opID, appName, user) - return fmt.Errorf("no running install or clone task found with opID: %s or appName: %s, user: %s", opID, appName, user) + resultMsg := "Installation completed successfully via external signal" + task, err := tm.completeRunningTask(opID, appName, user, + []TaskType{InstallApp, CloneApp}, Completed, resultMsg, "") + if err != nil { + glog.Warningf("[%s] InstallTaskSucceed - %v", tm.instanceID, err) + return err } - - // Mark task as completed - targetTask.Status = Completed - now := time.Now() - targetTask.CompletedAt = &now - - // Set appropriate result message based on task type - if targetTask.Type == CloneApp { - targetTask.Result = "Clone completed successfully via external signal" - } else { - targetTask.Result = "Installation completed successfully via external signal" + if task.Type == CloneApp { + task.Result = "Clone completed successfully via external signal" } - taskTypeStr := getTaskTypeString(targetTask.Type) - glog.V(2).Infof("[%s] InstallTaskSucceed - Task marked as completed: ID=%s, Type=%s, OpID=%s, AppName=%s, User=%s, Duration=%v", - tm.instanceID, targetTask.ID, taskTypeStr, targetTask.OpID, targetTask.AppName, targetTask.User, now.Sub(*targetTask.StartedAt)) - - // Remove task from running tasks - delete(tm.runningTasks, targetTask.ID) - glog.V(2).Infof("[%s] InstallTaskSucceed - Removed completed task from running tasks: ID=%s", tm.instanceID, targetTask.ID) - - tm.finalizeTaskPersistence(targetTask) - - // Record task completion in history - resultMsg := targetTask.Result - tm.recordTaskResult(targetTask, resultMsg, nil) - - // Send task finished system update - tm.sendTaskFinishedUpdate(targetTask, "succeed") + glog.V(2).Infof("[%s] InstallTaskSucceed - Task completed: ID=%s, Type=%s, OpID=%s, App=%s, User=%s", + tm.instanceID, task.ID, getTaskTypeString(task.Type), task.OpID, task.AppName, task.User) + tm.finalizeTaskPersistence(task) + tm.recordTaskResult(task, task.Result, nil) + tm.sendTaskFinishedUpdate(task, "succeed") return nil } // InstallTaskFailed marks an install task as failed by opID or appName+user func (tm *TaskModule) InstallTaskFailed(opID, appName, user, errorMsg string) error { - if !tm.mu.TryLock() { - glog.Warningf("[TryLock] failed to acquire lock for InstallTaskFailed, user: %s, opId: %s, app: %s, error: %s", user, opID, appName, errorMsg) - return fmt.Errorf("failed to acquire lock for InstallTaskFailed") - } - defer tm.mu.Unlock() - - // First try to find the install task with matching opID in running tasks - var targetTask *Task - for _, task := range tm.runningTasks { - if task.OpID == opID && task.Type == InstallApp { - targetTask = task - break - } - } - - // If opID match failed, try to find by appName and user - if targetTask == nil && appName != "" && user != "" { - for _, task := range tm.runningTasks { - if task.Type == InstallApp && task.AppName == appName && task.User == user { - targetTask = task - break - } - } - } - - if targetTask == nil { - glog.Warningf("[%s] InstallTaskFailed - No running install task found with opID: %s or appName: %s, user: %s", - tm.instanceID, opID, appName, user) - return fmt.Errorf("no running install task found with opID: %s or appName: %s, user: %s", opID, appName, user) + task, err := tm.completeRunningTask(opID, appName, user, + []TaskType{InstallApp}, Failed, "Installation failed via external signal", errorMsg) + if err != nil { + glog.Warningf("[%s] InstallTaskFailed - %v", tm.instanceID, err) + return err } - // Mark task as failed - targetTask.Status = Failed - now := time.Now() - targetTask.CompletedAt = &now - targetTask.ErrorMsg = errorMsg - targetTask.Result = "Installation failed via external signal" - - glog.V(2).Infof("[%s] InstallTaskFailed - Task marked as failed: ID=%s, OpID=%s, AppName=%s, User=%s, Duration=%v, Error: %s", - tm.instanceID, targetTask.ID, targetTask.OpID, targetTask.AppName, targetTask.User, now.Sub(*targetTask.StartedAt), errorMsg) - - // Remove task from running tasks - delete(tm.runningTasks, targetTask.ID) - glog.V(3).Infof("[%s] InstallTaskFailed - Removed failed task from running tasks: ID=%s", tm.instanceID, targetTask.ID) - - tm.finalizeTaskPersistence(targetTask) - - // Record task failure in history - tm.recordTaskResult(targetTask, "Installation failed via external signal", fmt.Errorf(errorMsg)) - - // Send task finished system update - tm.sendTaskFinishedUpdate(targetTask, "failed") + glog.V(2).Infof("[%s] InstallTaskFailed - Task failed: ID=%s, OpID=%s, App=%s, User=%s, Error: %s", + tm.instanceID, task.ID, task.OpID, task.AppName, task.User, errorMsg) + tm.finalizeTaskPersistence(task) + tm.recordTaskResult(task, task.Result, fmt.Errorf(errorMsg)) + tm.sendTaskFinishedUpdate(task, "failed") return nil } // InstallTaskCanceled marks an install task as canceled by app name and user func (tm *TaskModule) InstallTaskCanceled(appName, appVersion, source, user string) error { - if !tm.mu.TryLock() { - glog.Warningf("[TryLock] failed to acquire lock for InstallTaskCanceled, user: %s, source: %s, app: %s, version: %s", user, source, appName, appVersion) - return fmt.Errorf("failed to acquire lock for InstallTaskCanceled") - } - defer tm.mu.Unlock() - - // Find the install task with matching criteria in running tasks - var targetTask *Task - for _, task := range tm.runningTasks { - if task.Type == InstallApp && task.AppName == appName && task.User == user { - targetTask = task - break - } - } - - if targetTask == nil { - glog.V(2).Infof("[%s] InstallTaskCanceled - No running install task found with appName: %s, user: %s", - tm.instanceID, appName, user) - return fmt.Errorf("no running install task found with appName: %s, user: %s", appName, user) + task, err := tm.completeRunningTask("", appName, user, + []TaskType{InstallApp}, Canceled, "Installation canceled via external signal", "Installation canceled via external signal") + if err != nil { + glog.V(2).Infof("[%s] InstallTaskCanceled - %v", tm.instanceID, err) + return err } - // Mark task as canceled - targetTask.Status = Canceled - now := time.Now() - targetTask.CompletedAt = &now - targetTask.ErrorMsg = "Installation canceled via external signal" - targetTask.Result = "Installation canceled via external signal" - - glog.V(2).Infof("[%s] InstallTaskCanceled - Task marked as canceled: ID=%s, AppName=%s, User=%s, Duration=%v", - tm.instanceID, targetTask.ID, appName, user, now.Sub(*targetTask.StartedAt)) - - // Remove task from running tasks - delete(tm.runningTasks, targetTask.ID) - glog.V(3).Infof("[%s] InstallTaskCanceled - Removed canceled task from running tasks: ID=%s", tm.instanceID, targetTask.ID) - - tm.finalizeTaskPersistence(targetTask) - - // Record task cancellation in history - tm.recordTaskResult(targetTask, "Installation canceled via external signal", fmt.Errorf("installation canceled")) - - // Send task finished system update - tm.sendTaskFinishedUpdate(targetTask, "canceled") + glog.V(2).Infof("[%s] InstallTaskCanceled - Task canceled: ID=%s, App=%s, User=%s", + tm.instanceID, task.ID, task.AppName, task.User) + tm.finalizeTaskPersistence(task) + tm.recordTaskResult(task, task.Result, fmt.Errorf("installation canceled")) + tm.sendTaskFinishedUpdate(task, "canceled") return nil } // CancelInstallTaskSucceed marks a cancel install task as completed successfully by opID or appName+user func (tm *TaskModule) CancelInstallTaskSucceed(opID, appName, user string) error { - if !tm.mu.TryLock() { - glog.Warningf("[TryLock] failed to acquire lock for CancelInstallTaskSucceed, user: %s, opId: %s, app: %s", user, opID, appName) - return fmt.Errorf("failed to acquire lock for CancelInstallTaskSucceed") - } - defer tm.mu.Unlock() - - // First try to find the cancel install task with matching opID in running tasks - var targetTask *Task - for _, task := range tm.runningTasks { - if task.OpID == opID && task.Type == CancelAppInstall { - targetTask = task - break - } - } - - // If opID match failed, try to find by appName and user - if targetTask == nil && appName != "" && user != "" { - for _, task := range tm.runningTasks { - if task.Type == CancelAppInstall && task.AppName == appName && task.User == user { - targetTask = task - break - } - } + task, err := tm.completeRunningTask(opID, appName, user, + []TaskType{CancelAppInstall}, Completed, "Cancel installation completed successfully via external signal", "") + if err != nil { + glog.Warningf("[%s] CancelInstallTaskSucceed - %v", tm.instanceID, err) + return err } - if targetTask == nil { - glog.Warningf("[%s] CancelInstallTaskSucceed - No running cancel install task found with opID: %s or appName: %s, user: %s", - tm.instanceID, opID, appName, user) - return fmt.Errorf("no running cancel install task found with opID: %s or appName: %s, user: %s", opID, appName, user) - } - - // Mark task as completed - targetTask.Status = Completed - now := time.Now() - targetTask.CompletedAt = &now - targetTask.Result = "Cancel installation completed successfully via external signal" - - glog.V(2).Infof("[%s] CancelInstallTaskSucceed - Task marked as completed: ID=%s, OpID=%s, AppName=%s, User=%s, Duration=%v", - tm.instanceID, targetTask.ID, targetTask.OpID, targetTask.AppName, targetTask.User, now.Sub(*targetTask.StartedAt)) - - // Remove task from running tasks - delete(tm.runningTasks, targetTask.ID) - glog.V(2).Infof("[%s] CancelInstallTaskSucceed - Removed completed task from running tasks: ID=%s", tm.instanceID, targetTask.ID) - - tm.finalizeTaskPersistence(targetTask) - - // Record task completion in history - tm.recordTaskResult(targetTask, "Cancel installation completed successfully via external signal", nil) - - // Send task finished system update - tm.sendTaskFinishedUpdate(targetTask, "succeed") + glog.V(2).Infof("[%s] CancelInstallTaskSucceed - Task completed: ID=%s, OpID=%s, App=%s, User=%s", + tm.instanceID, task.ID, task.OpID, task.AppName, task.User) + tm.finalizeTaskPersistence(task) + tm.recordTaskResult(task, task.Result, nil) + tm.sendTaskFinishedUpdate(task, "succeed") return nil } // CancelInstallTaskFailed marks a cancel install task as failed by opID or appName+user func (tm *TaskModule) CancelInstallTaskFailed(opID, appName, user, errorMsg string) error { - if !tm.mu.TryLock() { - glog.Warningf("[TryLock] failed to acquire lock for CancelInstallTaskFailed, user: %s, opId: %s, name: %s, error: %s", user, opID, appName, errorMsg) - return fmt.Errorf("failed to acquire lock for CancelInstallTaskFailed") - } - defer tm.mu.Unlock() - - // First try to find the cancel install task with matching opID in running tasks - var targetTask *Task - for _, task := range tm.runningTasks { - if task.OpID == opID && task.Type == CancelAppInstall { - targetTask = task - break - } - } - - // If opID match failed, try to find by appName and user - if targetTask == nil && appName != "" && user != "" { - for _, task := range tm.runningTasks { - if task.Type == CancelAppInstall && task.AppName == appName && task.User == user { - targetTask = task - break - } - } - } - - if targetTask == nil { - glog.Warningf("[%s] CancelInstallTaskFailed - No running cancel install task found with opID: %s or appName: %s, user: %s", - tm.instanceID, opID, appName, user) - return fmt.Errorf("no running cancel install task found with opID: %s or appName: %s, user: %s", opID, appName, user) + task, err := tm.completeRunningTask(opID, appName, user, + []TaskType{CancelAppInstall}, Failed, "Cancel installation failed via external signal", errorMsg) + if err != nil { + glog.Warningf("[%s] CancelInstallTaskFailed - %v", tm.instanceID, err) + return err } - // Mark task as failed - targetTask.Status = Failed - now := time.Now() - targetTask.CompletedAt = &now - targetTask.ErrorMsg = errorMsg - targetTask.Result = "Cancel installation failed via external signal" - - glog.V(2).Infof("[%s] CancelInstallTaskFailed - Task marked as failed: ID=%s, OpID=%s, AppName=%s, User=%s, Duration=%v, Error: %s", - tm.instanceID, targetTask.ID, targetTask.OpID, targetTask.AppName, targetTask.User, now.Sub(*targetTask.StartedAt), errorMsg) - - // Remove task from running tasks - delete(tm.runningTasks, targetTask.ID) - glog.V(2).Infof("[%s] CancelInstallTaskFailed - Removed failed task from running tasks: ID=%s", tm.instanceID, targetTask.ID) - - tm.finalizeTaskPersistence(targetTask) - - // Record task failure in history - tm.recordTaskResult(targetTask, "Cancel installation failed via external signal", fmt.Errorf(errorMsg)) - - // Send task finished system update - tm.sendTaskFinishedUpdate(targetTask, "failed") + glog.V(2).Infof("[%s] CancelInstallTaskFailed - Task failed: ID=%s, OpID=%s, App=%s, User=%s, Error: %s", + tm.instanceID, task.ID, task.OpID, task.AppName, task.User, errorMsg) + tm.finalizeTaskPersistence(task) + tm.recordTaskResult(task, task.Result, fmt.Errorf(errorMsg)) + tm.sendTaskFinishedUpdate(task, "failed") return nil } // UninstallTaskSucceed marks an uninstall task as completed successfully by opID or appName+user func (tm *TaskModule) UninstallTaskSucceed(opID, appName, user string) error { - if !tm.mu.TryLock() { - glog.Warningf("[TryLock] failed to acquire lock for UninstallTaskSucceed, user: %s ,opId: %s, app: %s", user, opID, appName) - return fmt.Errorf("failed to acquire lock for UninstallTaskSucceed") - } - defer tm.mu.Unlock() - - // First try to find the uninstall task with matching opID in running tasks - var targetTask *Task - for _, task := range tm.runningTasks { - if task.OpID == opID && task.Type == UninstallApp { - targetTask = task - break - } - } - - // If opID match failed, try to find by appName and user - if targetTask == nil && appName != "" && user != "" { - for _, task := range tm.runningTasks { - if task.Type == UninstallApp && task.AppName == appName && task.User == user { - targetTask = task - break - } - } - } - - if targetTask == nil { - glog.Warningf("[%s] UninstallTaskSucceed - No running uninstall task found with opID: %s or appName: %s, user: %s", - tm.instanceID, opID, appName, user) - return fmt.Errorf("no running uninstall task found with opID: %s or appName: %s, user: %s", opID, appName, user) + task, err := tm.completeRunningTask(opID, appName, user, + []TaskType{UninstallApp}, Completed, "Uninstallation completed successfully via external signal", "") + if err != nil { + glog.Warningf("[%s] UninstallTaskSucceed - %v", tm.instanceID, err) + return err } - // Mark task as completed - targetTask.Status = Completed - now := time.Now() - targetTask.CompletedAt = &now - targetTask.Result = "Uninstallation completed successfully via external signal" - - glog.V(2).Infof("[%s] UninstallTaskSucceed - Task marked as completed: ID=%s, OpID=%s, AppName=%s, User=%s, Duration=%v", - tm.instanceID, targetTask.ID, targetTask.OpID, targetTask.AppName, targetTask.User, now.Sub(*targetTask.StartedAt)) - - // Remove task from running tasks - delete(tm.runningTasks, targetTask.ID) - glog.V(2).Infof("[%s] UninstallTaskSucceed - Removed completed task from running tasks: ID=%s", tm.instanceID, targetTask.ID) - - tm.finalizeTaskPersistence(targetTask) - - // Record task completion in history - tm.recordTaskResult(targetTask, "Uninstallation completed successfully via external signal", nil) - - // Send task finished system update - tm.sendTaskFinishedUpdate(targetTask, "succeed") + glog.V(2).Infof("[%s] UninstallTaskSucceed - Task completed: ID=%s, OpID=%s, App=%s, User=%s", + tm.instanceID, task.ID, task.OpID, task.AppName, task.User) + tm.finalizeTaskPersistence(task) + tm.recordTaskResult(task, task.Result, nil) + tm.sendTaskFinishedUpdate(task, "succeed") return nil } // UninstallTaskFailed marks an uninstall task as failed by opID or appName+user func (tm *TaskModule) UninstallTaskFailed(opID, appName, user, errorMsg string) error { - if !tm.mu.TryLock() { - glog.Warningf("[TryLock] failed to acquire lock for UninstallTaskFailed, user: %s, opId: %s, app: %s, error: %s", user, opID, appName, errorMsg) - return fmt.Errorf("failed to acquire lock for UninstallTaskFailed") - } - defer tm.mu.Unlock() - - // First try to find the uninstall task with matching opID in running tasks - var targetTask *Task - for _, task := range tm.runningTasks { - if task.OpID == opID && task.Type == UninstallApp { - targetTask = task - break - } - } - - // If opID match failed, try to find by appName and user - if targetTask == nil && appName != "" && user != "" { - for _, task := range tm.runningTasks { - if task.Type == UninstallApp && task.AppName == appName && task.User == user { - targetTask = task - break - } - } - } - - if targetTask == nil { - glog.Warningf("[%s] UninstallTaskFailed - No running uninstall task found with opID: %s or appName: %s, user: %s", - tm.instanceID, opID, appName, user) - return fmt.Errorf("no running uninstall task found with opID: %s or appName: %s, user: %s", opID, appName, user) + task, err := tm.completeRunningTask(opID, appName, user, + []TaskType{UninstallApp}, Failed, "Uninstallation failed via external signal", errorMsg) + if err != nil { + glog.Warningf("[%s] UninstallTaskFailed - %v", tm.instanceID, err) + return err } - // Mark task as failed - targetTask.Status = Failed - now := time.Now() - targetTask.CompletedAt = &now - targetTask.ErrorMsg = errorMsg - targetTask.Result = "Uninstallation failed via external signal" - - glog.V(2).Infof("[%s] UninstallTaskFailed - Task marked as failed: ID=%s, OpID=%s, AppName=%s, User=%s, Duration=%v, Error: %s", - tm.instanceID, targetTask.ID, targetTask.OpID, targetTask.AppName, targetTask.User, now.Sub(*targetTask.StartedAt), errorMsg) - - // Remove task from running tasks - delete(tm.runningTasks, targetTask.ID) - glog.V(2).Infof("[%s] UninstallTaskFailed - Removed failed task from running tasks: ID=%s", tm.instanceID, targetTask.ID) - - tm.finalizeTaskPersistence(targetTask) - - // Record task failure in history - tm.recordTaskResult(targetTask, "Uninstallation failed via external signal", fmt.Errorf(errorMsg)) - - // Send task finished system update - tm.sendTaskFinishedUpdate(targetTask, "failed") + glog.V(2).Infof("[%s] UninstallTaskFailed - Task failed: ID=%s, OpID=%s, App=%s, User=%s, Error: %s", + tm.instanceID, task.ID, task.OpID, task.AppName, task.User, errorMsg) + tm.finalizeTaskPersistence(task) + tm.recordTaskResult(task, task.Result, fmt.Errorf(errorMsg)) + tm.sendTaskFinishedUpdate(task, "failed") return nil } From fa557a9672e5d241f5ef961dbafc4077019a5152 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 5 Mar 2026 09:48:16 +0000 Subject: [PATCH 24/45] refactor: replace TryRLock with RLock in app_install and app_clone TryRLock failure was silently skipping VC (verifiable credential) injection during install/clone, which could cause payment-related issues. The write lock holder (SetSettingsManager) only does a field assignment (nanoseconds), so RLock wait time is negligible. Co-authored-by: aby913 --- internal/v2/task/app_clone.go | 12 +++--------- internal/v2/task/app_install.go | 12 +++--------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/internal/v2/task/app_clone.go b/internal/v2/task/app_clone.go index 50d6e49..ccc4af2 100644 --- a/internal/v2/task/app_clone.go +++ b/internal/v2/task/app_clone.go @@ -7,8 +7,6 @@ import ( "os" "strings" - "market/internal/v2/settings" - "github.com/golang/glog" ) @@ -144,13 +142,9 @@ func (tm *TaskModule) AppClone(task *Task) (string, error) { } // Get VC from purchase receipt using rawAppName and inject into environment variables - var settingsManager *settings.SettingsManager - if tm.mu.TryRLock() { - settingsManager = tm.settingsManager - tm.mu.RUnlock() - } else { - glog.Warningf("Failed to acquire read lock for settingsManager, skipping VC injection for task: %s", task.ID) - } + tm.mu.RLock() + settingsManager := tm.settingsManager + tm.mu.RUnlock() if settingsManager != nil { vc := getVCForClone(settingsManager, user, rawAppName, task.Metadata) diff --git a/internal/v2/task/app_install.go b/internal/v2/task/app_install.go index fca4e2b..df93a9e 100644 --- a/internal/v2/task/app_install.go +++ b/internal/v2/task/app_install.go @@ -9,8 +9,6 @@ import ( "os" "strings" - "market/internal/v2/settings" - "github.com/golang/glog" ) @@ -128,13 +126,9 @@ func (tm *TaskModule) AppInstall(task *Task) (string, error) { } // Get VC from purchase receipt and inject into environment variables - var settingsManager *settings.SettingsManager - if tm.mu.TryRLock() { - settingsManager = tm.settingsManager - tm.mu.RUnlock() - } else { - glog.Warningf("[TryRLock] Failed to acquire read lock for settingsManager, skipping VC injection for task: %s, user: %s, app: %s", task.ID, task.User, task.AppName) - } + tm.mu.RLock() + settingsManager := tm.settingsManager + tm.mu.RUnlock() if settingsManager != nil { vcAppID := appName From 062c7698cec6825313c3e5a8f5d0645821556432 Mon Sep 17 00:00:00 2001 From: aby913 Date: Thu, 5 Mar 2026 19:56:33 +0800 Subject: [PATCH 25/45] refactor: improve logs --- internal/v2/appinfo/datawatcher_state.go | 2 -- internal/v2/appinfo/hydration.go | 11 +++++++---- internal/v2/appinfo/hydrationfn/task_for_api.go | 2 +- internal/v2/appinfo/syncerfn/detail_fetch_step.go | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/internal/v2/appinfo/datawatcher_state.go b/internal/v2/appinfo/datawatcher_state.go index e84b985..bdf09ed 100644 --- a/internal/v2/appinfo/datawatcher_state.go +++ b/internal/v2/appinfo/datawatcher_state.go @@ -1157,8 +1157,6 @@ func (dw *DataWatcherState) processDelayedMessagesBatch() { dw.delayedMessagesMutex.Lock() defer dw.delayedMessagesMutex.Unlock() - glog.Info("DatawatcherState process") - now := time.Now() var remaining []*DelayedMessage maxRetries := 10 // Maximum 10 retries (about 20 seconds total) diff --git a/internal/v2/appinfo/hydration.go b/internal/v2/appinfo/hydration.go index e0f14b0..047290e 100644 --- a/internal/v2/appinfo/hydration.go +++ b/internal/v2/appinfo/hydration.go @@ -265,6 +265,7 @@ func (h *Hydrator) isAppHydrationComplete(pendingData *types.AppInfoLatestPendin glog.V(2).Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - ImageAnalysis incomplete for appID=%s, name=%s, TotalImages: %d, Images: %v", appID, appName, imageAnalysis.TotalImages, imageAnalysis.Images) return false } + // convertApplicationInfoEntryToMap converts ApplicationInfoEntry to map for task creation func (h *Hydrator) convertApplicationInfoEntryToMap(entry *types.ApplicationInfoEntry) map[string]interface{} { if entry == nil { @@ -425,6 +426,7 @@ func (h *Hydrator) deepCopyValue(value interface{}, visited map[uintptr]bool) in return nil } } + // markTaskCompleted moves task from active to completed func (h *Hydrator) markTaskCompleted(task *hydrationfn.HydrationTask, startedAt time.Time, duration time.Duration) { // Extract file path for cleanup before the lock @@ -593,8 +595,8 @@ func (h *Hydrator) moveTaskToRenderFailed(task *hydrationfn.HydrationTask, failu return } - glog.V(2).Infof("Successfully moved task %s (app: %s) to render failed list with reason: %s, step: %s", - task.ID, task.AppID, failureReason, failureStep) + glog.V(2).Infof("Successfully moved task %s (app: %s/%s/%s) to render failed list with reason: %s, step: %s", + task.ID, task.AppID, task.AppName, task.AppVersion, failureReason, failureStep) // Remove from pending list h.removeFromPendingList(task.UserID, task.SourceID, task.AppID, task.AppName, task.AppVersion) @@ -607,7 +609,7 @@ func (h *Hydrator) removeFromPendingList(userID, sourceID, appID, appName, appVe return } h.cacheManager.RemoveFromPendingList(userID, sourceID, appID) - glog.V(2).Infof("Removed app %s from pending list for user: %s, source: %s", appID, userID, sourceID) + glog.V(2).Infof("Removed app %s(%s) from pending list for user: %s, source: %s", appID, appName, userID, sourceID) } // GetMetrics returns hydrator metrics @@ -1051,6 +1053,7 @@ func (h *Hydrator) isAppInLatestQueue(userID, sourceID, appID, appName, version glog.V(3).Infof("DEBUG: isAppInLatestQueue returning %v for appID=%s, version=%s, user=%s, source=%s", result, appID, version, userID, sourceID) return result } + // isAppInRenderFailedList checks if an app already exists in the render failed list func (h *Hydrator) isAppInRenderFailedList(userID, sourceID, appID, appName string) bool { if h.cacheManager == nil { @@ -1058,4 +1061,4 @@ func (h *Hydrator) isAppInRenderFailedList(userID, sourceID, appID, appName stri return false } return h.cacheManager.IsAppInRenderFailedList(userID, sourceID, appID) -} \ No newline at end of file +} diff --git a/internal/v2/appinfo/hydrationfn/task_for_api.go b/internal/v2/appinfo/hydrationfn/task_for_api.go index cd4d682..965363d 100644 --- a/internal/v2/appinfo/hydrationfn/task_for_api.go +++ b/internal/v2/appinfo/hydrationfn/task_for_api.go @@ -85,7 +85,7 @@ func (s *TaskForApiStep) Execute(ctx context.Context, task *HydrationTask) error Post(url) duration := time.Since(startTime) if err != nil || resp.StatusCode() >= 300 { - glog.Errorf("TaskForApiStep - Request failed in %v for user=%s, source=%s, app=%s: %v", duration, task.UserID, task.SourceID, task.AppID, err) + glog.Errorf("TaskForApiStep - Request failed in %v for user=%s, source=%s, app=%s(%s/%s): %v", duration, task.UserID, task.SourceID, task.AppID, task.AppName, task.AppVersion, err) } if err != nil { return fmt.Errorf("failed to call chart repo sync-app: %w", err) diff --git a/internal/v2/appinfo/syncerfn/detail_fetch_step.go b/internal/v2/appinfo/syncerfn/detail_fetch_step.go index b0478fa..50be47b 100644 --- a/internal/v2/appinfo/syncerfn/detail_fetch_step.go +++ b/internal/v2/appinfo/syncerfn/detail_fetch_step.go @@ -580,7 +580,7 @@ func (d *DetailFetchStep) removeAppFromCache(appID string, appInfoMap map[string } // IMPORTANT: use MarketSource.ID as the key for Sources map (not Name) sourceID := source.ID - glog.V(2).Infof("Removing all versions of app %s (name: %s) from cache for source: %s (sourceID=%s) [SUSPEND/REMOVE]", appID, appName, source.Name, sourceID) + glog.V(2).Infof("Removing all versions of app %s(%s) from cache for source: %s [SUSPEND/REMOVE]", appID, appName, sourceID) if data.CacheManager == nil { glog.V(3).Infof("Warning: CacheManager is nil, cannot remove app from cache") From d82d54a071e806f890c57652ea9ad201923dacf4 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Thu, 5 Mar 2026 13:00:37 +0000 Subject: [PATCH 26/45] refactor: replace TryLock with Lock/RLock in syncer, hydration, and appinfomodule MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit syncer.go: - AddStep/RemoveStep/GetSteps: TryLock/TryRLock → Lock/RLock (prevents silent step loss on lock contention) - StartWithOptions/Stop: TryLock → Lock (Stop must not silently fail - syncer would be unstoppable) - syncLoop defer cleanup: TryLock → Lock (ensures isRunning is always reset on exit) - updateSyncSuccess/updateSyncFailure: TryLock → Lock (prevents lost status updates) - SetCacheManager: TryLock → Lock (prevents silent configuration loss) hydration.go: - GetMetrics: TryRLock → RLock, simplified fallback-free path - getRecentCompletedTasks/getRecentFailedTasks: TryRLock → RLock - processCompletedTask/processBatchCompletions: TryRLock → RLock - checkAndSyncToDatabase: TryRLock → RLock - monitorMemoryUsage: TryRLock → RLock - cleanupOldCompletedTasks/cleanupOldTasks: TryLock → Lock (prevents memory leak from skipped cleanup) appinfomodule.go: - IsStarted: TryRLock → RLock (no longer returns false on contention) - GetModuleStatus: TryRLock → RLock (no longer returns error status) Kept TryLock in pipeline.go:run() and datawatcher_app.go:processCompletedApps() as timer-guard pattern (skip cycle if previous still running). Co-authored-by: aby913 --- internal/v2/appinfo/appinfomodule.go | 15 +---- internal/v2/appinfo/hydration.go | 87 ++++++---------------------- internal/v2/appinfo/syncer.go | 48 ++++----------- 3 files changed, 32 insertions(+), 118 deletions(-) diff --git a/internal/v2/appinfo/appinfomodule.go b/internal/v2/appinfo/appinfomodule.go index ef0a8fe..859e710 100644 --- a/internal/v2/appinfo/appinfomodule.go +++ b/internal/v2/appinfo/appinfomodule.go @@ -402,25 +402,14 @@ func (m *AppInfoModule) GetRedisConfig() *RedisConfig { // IsStarted returns whether the module is currently running func (m *AppInfoModule) IsStarted() bool { - // Boolean read is atomic, but we need to ensure consistency with Start/Stop operations - if !m.mutex.TryRLock() { - glog.Warning("[TryRLock] AppInfoModule.IsStarted: Read lock not available, returning false") - return false - } + m.mutex.RLock() defer m.mutex.RUnlock() return m.isStarted } // GetModuleStatus returns the current status of the module and all components func (m *AppInfoModule) GetModuleStatus() map[string]interface{} { - // Need read lock to ensure consistent snapshot of all component states - if !m.mutex.TryRLock() { - glog.Warning("[TryRLock] AppInfoModule.GetModuleStatus: Read lock not available, returning error status") - return map[string]interface{}{ - "error": "lock not available", - "status": "unknown", - } - } + m.mutex.RLock() defer m.mutex.RUnlock() status := map[string]interface{}{ diff --git a/internal/v2/appinfo/hydration.go b/internal/v2/appinfo/hydration.go index 047290e..83f8ad6 100644 --- a/internal/v2/appinfo/hydration.go +++ b/internal/v2/appinfo/hydration.go @@ -614,53 +614,29 @@ func (h *Hydrator) removeFromPendingList(userID, sourceID, appID, appName, appVe // GetMetrics returns hydrator metrics func (h *Hydrator) GetMetrics() HydratorMetrics { - if !h.taskMutex.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for GetMetrics, returning zero metrics") - // Try to get worker status even if we can't get task lock - var workers []*WorkerStatus - if h.workerStatusMutex.TryRLock() { - workers = h.getWorkerStatusList() - h.workerStatusMutex.RUnlock() - } - return HydratorMetrics{ - TotalTasksProcessed: h.totalTasksProcessed, - TotalTasksSucceeded: h.totalTasksSucceeded, - TotalTasksFailed: h.totalTasksFailed, - ActiveTasksCount: 0, - CompletedTasksCount: 0, - FailedTasksCount: 0, - QueueLength: int64(len(h.taskQueue)), - ActiveTasks: []*TaskInfo{}, - RecentCompletedTasks: h.getRecentCompletedTasks(), - RecentFailedTasks: h.getRecentFailedTasks(), - Workers: workers, - } - } - - // Get active tasks info + h.taskMutex.RLock() activeTasksList := make([]*TaskInfo, 0, len(h.activeTasks)) for _, task := range h.activeTasks { if task != nil { activeTasksList = append(activeTasksList, h.taskToTaskInfo(task)) } } - + activeCount := int64(len(h.activeTasks)) + completedCount := int64(len(h.completedTasks)) + failedCount := int64(len(h.failedTasks)) h.taskMutex.RUnlock() - // Get worker status - var workers []*WorkerStatus - if h.workerStatusMutex.TryRLock() { - workers = h.getWorkerStatusList() - h.workerStatusMutex.RUnlock() - } + h.workerStatusMutex.RLock() + workers := h.getWorkerStatusList() + h.workerStatusMutex.RUnlock() return HydratorMetrics{ TotalTasksProcessed: h.totalTasksProcessed, TotalTasksSucceeded: h.totalTasksSucceeded, TotalTasksFailed: h.totalTasksFailed, - ActiveTasksCount: int64(len(h.activeTasks)), - CompletedTasksCount: int64(len(h.completedTasks)), - FailedTasksCount: int64(len(h.failedTasks)), + ActiveTasksCount: activeCount, + CompletedTasksCount: completedCount, + FailedTasksCount: failedCount, QueueLength: int64(len(h.taskQueue)), ActiveTasks: activeTasksList, RecentCompletedTasks: h.getRecentCompletedTasks(), @@ -724,13 +700,9 @@ func (h *Hydrator) getWorkerStatusList() []*WorkerStatus { // getRecentCompletedTasks returns recent completed tasks (thread-safe) func (h *Hydrator) getRecentCompletedTasks() []*TaskHistoryEntry { - // Return a copy to avoid race conditions - if !h.workerStatusMutex.TryRLock() { - return make([]*TaskHistoryEntry, 0) - } + h.workerStatusMutex.RLock() defer h.workerStatusMutex.RUnlock() - // Return a copy result := make([]*TaskHistoryEntry, len(h.recentCompletedTasks)) copy(result, h.recentCompletedTasks) return result @@ -738,13 +710,9 @@ func (h *Hydrator) getRecentCompletedTasks() []*TaskHistoryEntry { // getRecentFailedTasks returns recent failed tasks (thread-safe) func (h *Hydrator) getRecentFailedTasks() []*TaskHistoryEntry { - // Return a copy to avoid race conditions - if !h.workerStatusMutex.TryRLock() { - return make([]*TaskHistoryEntry, 0) - } + h.workerStatusMutex.RLock() defer h.workerStatusMutex.RUnlock() - // Return a copy result := make([]*TaskHistoryEntry, len(h.recentFailedTasks)) copy(result, h.recentFailedTasks) return result @@ -821,10 +789,7 @@ func (h *Hydrator) databaseSyncMonitor(ctx context.Context) { // processCompletedTask processes a single completed task func (h *Hydrator) processCompletedTask(taskID string) { - if !h.taskMutex.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for processCompletedTask, skipping") - return - } + h.taskMutex.RLock() task, exists := h.completedTasks[taskID] h.taskMutex.RUnlock() @@ -839,10 +804,7 @@ func (h *Hydrator) processCompletedTask(taskID string) { // processBatchCompletions processes completed tasks in batches func (h *Hydrator) processBatchCompletions() { - if !h.taskMutex.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for processBatchCompletions, skipping") - return - } + h.taskMutex.RLock() currentCompleted := h.totalTasksSucceeded h.taskMutex.RUnlock() @@ -862,11 +824,7 @@ func (h *Hydrator) checkAndSyncToDatabase() { return } - // Check if there are completed tasks that need syncing - if !h.taskMutex.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for checkAndSyncToDatabase, skipping") - return - } + h.taskMutex.RLock() completedCount := len(h.completedTasks) h.taskMutex.RUnlock() @@ -904,10 +862,7 @@ func (h *Hydrator) triggerDatabaseSync() { // cleanupOldCompletedTasks removes old completed tasks from memory func (h *Hydrator) cleanupOldCompletedTasks() { - if !h.taskMutex.TryLock() { - glog.Warning("[TryLock] Failed to acquire lock for cleanupOldCompletedTasks, skipping") - return - } + h.taskMutex.Lock() defer h.taskMutex.Unlock() // Keep only the most recent 100 completed tasks @@ -944,10 +899,7 @@ func (h *Hydrator) monitorMemoryUsage() { h.lastMemoryCheck = time.Now() - if !h.taskMutex.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for monitorMemoryUsage, skipping") - return - } + h.taskMutex.RLock() activeCount := len(h.activeTasks) completedCount := len(h.completedTasks) failedCount := len(h.failedTasks) @@ -966,10 +918,7 @@ func (h *Hydrator) monitorMemoryUsage() { // cleanupOldTasks cleans up old tasks from all task maps func (h *Hydrator) cleanupOldTasks() { - if !h.taskMutex.TryLock() { - glog.Warning("[TryLock] Failed to acquire lock for cleanupOldTasks, skipping") - return - } + h.taskMutex.Lock() defer h.taskMutex.Unlock() now := time.Now() diff --git a/internal/v2/appinfo/syncer.go b/internal/v2/appinfo/syncer.go index f0b088a..fd5868a 100644 --- a/internal/v2/appinfo/syncer.go +++ b/internal/v2/appinfo/syncer.go @@ -78,19 +78,14 @@ func NewSyncer(cache *CacheData, syncInterval time.Duration, settingsManager *se // AddStep adds a step to the syncer func (s *Syncer) AddStep(step syncerfn.SyncStep) { - if !s.mutex.TryLock() { - glog.Warning("[TryLock] Failed to acquire lock for AddStep, skipping") - return - } + s.mutex.Lock() defer s.mutex.Unlock() s.steps = append(s.steps, step) } // RemoveStep removes a step by index func (s *Syncer) RemoveStep(index int) error { - if !s.mutex.TryLock() { - return fmt.Errorf("failed to acquire lock for RemoveStep") - } + s.mutex.Lock() defer s.mutex.Unlock() if index < 0 || index >= len(s.steps) { @@ -103,10 +98,7 @@ func (s *Syncer) RemoveStep(index int) error { // GetSteps returns a copy of all steps func (s *Syncer) GetSteps() []syncerfn.SyncStep { - if !s.mutex.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for GetSteps, returning empty slice") - return make([]syncerfn.SyncStep, 0) - } + s.mutex.RLock() defer s.mutex.RUnlock() steps := make([]syncerfn.SyncStep, len(s.steps)) @@ -122,9 +114,7 @@ func (s *Syncer) Start(ctx context.Context) error { // StartWithOptions starts the syncer with options. // If enableSyncLoop is false, the periodic sync loop is not started (Pipeline handles scheduling). func (s *Syncer) StartWithOptions(ctx context.Context, enableSyncLoop bool) error { - if !s.mutex.TryLock() { - return fmt.Errorf("failed to acquire lock for Start") - } + s.mutex.Lock() if s.isRunning.Load() { s.mutex.Unlock() return fmt.Errorf("syncer is already running") @@ -272,10 +262,7 @@ func (s *Syncer) hasSyncRelevantConfigChanged() (changed bool, reason string) { // Stop stops the synchronization process func (s *Syncer) Stop() { - if !s.mutex.TryLock() { - glog.Warning("[TryLock] Failed to acquire lock for Stop, skipping") - return - } + s.mutex.Lock() defer s.mutex.Unlock() if !s.isRunning.Load() { @@ -295,11 +282,9 @@ func (s *Syncer) IsRunning() bool { // syncLoop runs the main synchronization loop func (s *Syncer) syncLoop(ctx context.Context) { defer func() { - // Use TryLock for cleanup to avoid blocking - if s.mutex.TryLock() { - s.isRunning.Store(false) - s.mutex.Unlock() - } + s.mutex.Lock() + s.isRunning.Store(false) + s.mutex.Unlock() glog.V(4).Info("Syncer stopped") }() @@ -441,10 +426,7 @@ func (s *Syncer) executeSyncCycle(ctx context.Context) error { // updateSyncSuccess updates status after a successful sync func (s *Syncer) updateSyncSuccess(duration time.Duration, startTime time.Time) { - if !s.statusMutex.TryLock() { - glog.Warning("[TryLock] Failed to acquire lock for updateSyncSuccess, skipping status update") - return - } + s.statusMutex.Lock() defer s.statusMutex.Unlock() s.lastSyncSuccess.Store(time.Now()) @@ -459,10 +441,7 @@ func (s *Syncer) updateSyncSuccess(duration time.Duration, startTime time.Time) // updateSyncFailure updates status after a failed sync func (s *Syncer) updateSyncFailure(err error, startTime time.Time) { - if !s.statusMutex.TryLock() { - glog.Warning("[TryLock] Failed to acquire lock for updateSyncFailure, skipping status update") - return - } + s.statusMutex.Lock() defer s.statusMutex.Unlock() duration := time.Since(startTime) @@ -993,12 +972,9 @@ func DefaultSyncerConfig() SyncerConfig { // SetCacheManager sets the cache manager for hydration notifications func (s *Syncer) SetCacheManager(cacheManager *CacheManager) { - if !s.mutex.TryLock() { - glog.Warning("[TryLock] Failed to acquire lock for SetCacheManager, skipping") - return - } + s.mutex.Lock() defer s.mutex.Unlock() - s.cacheManager.Store(cacheManager) // Use atomic.Store to set the pointer + s.cacheManager.Store(cacheManager) } // SyncDetails contains detailed information about a sync operation From cd725317f419abed8d8f57798fd3c0c62e018808 Mon Sep 17 00:00:00 2001 From: aby913 Date: Fri, 6 Mar 2026 11:55:26 +0800 Subject: [PATCH 27/45] refactor: ClearAppRenderFailedData --- internal/v2/appinfo/cache.go | 90 ++++++++---------------------------- 1 file changed, 18 insertions(+), 72 deletions(-) diff --git a/internal/v2/appinfo/cache.go b/internal/v2/appinfo/cache.go index 2382019..ff57496 100644 --- a/internal/v2/appinfo/cache.go +++ b/internal/v2/appinfo/cache.go @@ -2455,103 +2455,49 @@ func (cm *CacheManager) cleanupWorker() { // ClearAppRenderFailedData clears all AppRenderFailed data for all users and sources func (cm *CacheManager) ClearAppRenderFailedData() { - glog.V(3).Info("INFO: Starting periodic cleanup of AppRenderFailed data") + glog.Info("INFO: Starting periodic cleanup of AppRenderFailed data") - start := time.Now() - // 1) Short lock phase: collect keys to be cleaned and count the number - type target struct{ userID, sourceID string } - targets := make([]target, 0, 128) - counts := make(map[target]int) - - glog.V(3).Info("INFO: [Cleanup] Attempting to acquire read lock for scan phase") cm.mutex.RLock() - scanLockAcquiredAt := time.Now() - glog.V(3).Info("INFO: [Cleanup] Read lock acquired (scan). Hold minimal time") - if cm.cache == nil { cm.mutex.RUnlock() - glog.V(4).Info("WARN: Cache is nil, skipping AppRenderFailed cleanup") return } + type target struct{ userID, sourceID string } + targets := make([]target, 0, 128) + for userID, userData := range cm.cache.Users { for sourceID, sourceData := range userData.Sources { - if n := len(sourceData.AppRenderFailed); n > 0 { - t := target{userID: userID, sourceID: sourceID} - targets = append(targets, t) - counts[t] = n + if len(sourceData.AppRenderFailed) > 0 { + targets = append(targets, target{userID: userID, sourceID: sourceID}) } } } - // 2) Release read lock after scan cm.mutex.RUnlock() - glog.V(3).Infof("INFO: [Cleanup] Released read lock after scan (held %v), targets=%d", time.Since(scanLockAcquiredAt), len(targets)) - - // 3) Processing phase: Use batch processing to avoid lock contention - totalCleared := 0 if len(targets) == 0 { - glog.V(3).Infof("DEBUG: No AppRenderFailed entries found during periodic cleanup (took %v)", time.Since(start)) return } - // Use single write lock to batch process all targets to avoid lock contention - glog.V(3).Infof("INFO: [Cleanup] Processing %d targets in batch mode", len(targets)) - - // Use short timeout to quickly acquire write lock to avoid writer starvation - ctx, cancel := context.WithTimeout(context.Background(), 1*time.Millisecond) - defer cancel() - - // Use channel to implement non-blocking lock acquisition - lockAcquired := make(chan struct{}, 1) - lockFailed := make(chan struct{}, 1) - - // Start goroutine to attempt lock acquisition (only for very short time, give up immediately if not acquired) - go func() { - done := make(chan struct{}, 1) - go func() { - cm.mutex.Lock() - done <- struct{}{} - }() - select { - case <-done: - // Successfully acquired lock - lockAcquired <- struct{}{} - case <-ctx.Done(): - // Failed to acquire write lock quickly, give up immediately to avoid reader starvation - lockFailed <- struct{}{} - } - }() + start := time.Now() + cm.mutex.Lock() + defer cm.mutex.Unlock() - // Wait for lock acquisition result - select { - case <-lockAcquired: - // Successfully acquired lock, batch process all targets - defer cm.mutex.Unlock() - - for _, t := range targets { - if userData, ok := cm.cache.Users[t.userID]; ok { - if sourceData, ok2 := userData.Sources[t.sourceID]; ok2 { - originalCount := len(sourceData.AppRenderFailed) - if originalCount > 0 { - sourceData.AppRenderFailed = make([]*types.AppRenderFailedData, 0) - totalCleared += originalCount - glog.V(3).Infof("INFO: [Cleanup] Cleared %d AppRenderFailed entries for user=%s, source=%s", originalCount, t.userID, t.sourceID) - } + count := 0 + for _, t := range targets { + if userData, ok := cm.cache.Users[t.userID]; ok { + if sourceData, ok := userData.Sources[t.sourceID]; ok { + if len(sourceData.AppRenderFailed) > 0 { + count += len(sourceData.AppRenderFailed) + sourceData.AppRenderFailed = make([]*types.AppRenderFailedData, 0) } } } - - case <-lockFailed: - glog.Error("DEBUG: [Cleanup] Failed to acquire write lock quickly, skipping cleanup to avoid reader starvation") - return } - if totalCleared > 0 { - glog.V(2).Infof("INFO: Periodic cleanup completed, cleared %d total AppRenderFailed entries in %v", totalCleared, time.Since(start)) - } else { - glog.V(3).Infof("DEBUG: No AppRenderFailed entries found during periodic cleanup (took %v)", time.Since(start)) + if count > 0 { + glog.Infof("INFO: [Cleanup] Cleared %d AppRenderFailed entries in %v", count, time.Since(start)) } } From df83c40e36801b1dcaba3a1c3f14ba22d17d61c3 Mon Sep 17 00:00:00 2001 From: aby913 Date: Fri, 6 Mar 2026 11:56:03 +0800 Subject: [PATCH 28/45] fix: format system status allusers --- pkg/v2/api/system.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/v2/api/system.go b/pkg/v2/api/system.go index 8800931..61eca87 100644 --- a/pkg/v2/api/system.go +++ b/pkg/v2/api/system.go @@ -321,6 +321,9 @@ func (s *Server) getSystemStatus(w http.ResponseWriter, r *http.Request) { if userInfo == nil { userInfo = map[string]interface{}{} } + if allUsers == nil { + allUsers = make([]map[string]string, 0) + } if curUserResource == nil { curUserResource = map[string]interface{}{} } From 15ba600dea119c25f194e50565c5580637db37e8 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 6 Mar 2026 05:25:34 +0000 Subject: [PATCH 29/45] feat: restore retryable failed apps to pending queue each pipeline cycle Add RestoreRetryableFailedToPending to CacheManager: each Pipeline cycle, move up to 50 items from AppRenderFailed back to AppInfoLatestPending (FIFO) so the hydrator can retry them without waiting for the 5-minute GC cleanup. - Failed apps that timeout on dcr/sync-app (3s) are retried ~30s later instead of waiting 5+ minutes for ClearAppRenderFailedData - Items are atomically removed from Failed and added to Pending under cm.mutex.Lock, no concurrent modification risk - ClearAppRenderFailedData (5-min GC) remains as safety net for permanently failing apps - moveTaskToRenderFailed is unchanged: if retry fails again, the app goes back to Failed through the normal path Co-authored-by: aby913 --- internal/v2/appinfo/cache.go | 45 +++++++++++++++++++++++++++++++++ internal/v2/appinfo/pipeline.go | 2 ++ 2 files changed, 47 insertions(+) diff --git a/internal/v2/appinfo/cache.go b/internal/v2/appinfo/cache.go index ff57496..a32a0a1 100644 --- a/internal/v2/appinfo/cache.go +++ b/internal/v2/appinfo/cache.go @@ -571,6 +571,51 @@ func (cm *CacheManager) CollectAllPendingItems() []PendingItem { return items } +// RestoreRetryableFailedToPending moves up to `limit` items from AppRenderFailed +// back to AppInfoLatestPending (FIFO order) so they can be retried by the hydrator. +// Items are removed from AppRenderFailed to avoid duplicates. +// Returns the number of items restored. +func (cm *CacheManager) RestoreRetryableFailedToPending(limit int) int { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + restored := 0 + for _, userData := range cm.cache.Users { + if restored >= limit { + break + } + for _, sourceData := range userData.Sources { + if restored >= limit { + break + } + i := 0 + for i < len(sourceData.AppRenderFailed) && restored < limit { + fd := sourceData.AppRenderFailed[i] + if fd == nil || fd.RawData == nil { + i++ + continue + } + sourceData.AppInfoLatestPending = append(sourceData.AppInfoLatestPending, &types.AppInfoLatestPendingData{ + Type: types.AppInfoLatestPending, + Timestamp: fd.Timestamp, + Version: fd.Version, + RawData: fd.RawData, + RawPackage: fd.RawPackage, + Values: fd.Values, + AppInfo: fd.AppInfo, + RenderedPackage: fd.RenderedPackage, + }) + sourceData.AppRenderFailed = append(sourceData.AppRenderFailed[:i], sourceData.AppRenderFailed[i+1:]...) + restored++ + } + } + } + if restored > 0 { + glog.V(2).Infof("RestoreRetryableFailedToPending: restored %d failed apps to pending queue", restored) + } + return restored +} + // SnapshotSourcePending returns shallow copies of the pending and latest slices // for the given user/source, safe for iteration outside the lock. func (cm *CacheManager) SnapshotSourcePending(userID, sourceID string) ( diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go index 1ac91b1..4b0f5ae 100644 --- a/internal/v2/appinfo/pipeline.go +++ b/internal/v2/appinfo/pipeline.go @@ -172,6 +172,8 @@ func (p *Pipeline) phaseHydrateApps(ctx context.Context) map[string]bool { return affectedUsers } + p.cacheManager.RestoreRetryableFailedToPending(50) + items := p.cacheManager.CollectAllPendingItems() if len(items) == 0 { From da7efd08be0e94d3bd6201dd05e69bf24fe53201 Mon Sep 17 00:00:00 2001 From: aby913 Date: Fri, 6 Mar 2026 20:03:37 +0800 Subject: [PATCH 30/45] refactor: improve logs, remove TryLock --- internal/v2/appinfo/appinfomodule.go | 1 + internal/v2/appinfo/cache.go | 2 +- internal/v2/appinfo/hydration.go | 18 ++-- internal/v2/appinfo/pipeline.go | 3 +- .../v2/appinfo/syncerfn/detail_fetch_step.go | 24 +++--- internal/v2/runtime/store.go | 82 ++++--------------- 6 files changed, 40 insertions(+), 90 deletions(-) diff --git a/internal/v2/appinfo/appinfomodule.go b/internal/v2/appinfo/appinfomodule.go index 859e710..75cbeef 100644 --- a/internal/v2/appinfo/appinfomodule.go +++ b/internal/v2/appinfo/appinfomodule.go @@ -1272,6 +1272,7 @@ func (m *AppInfoModule) SyncUserListToCache() error { } // RefreshUserDataStructures ensures all configured users have proper data structures +// not used func (m *AppInfoModule) RefreshUserDataStructures() error { // Check isStarted without lock since it's only read if !m.isStarted { diff --git a/internal/v2/appinfo/cache.go b/internal/v2/appinfo/cache.go index a32a0a1..7b67500 100644 --- a/internal/v2/appinfo/cache.go +++ b/internal/v2/appinfo/cache.go @@ -2500,7 +2500,7 @@ func (cm *CacheManager) cleanupWorker() { // ClearAppRenderFailedData clears all AppRenderFailed data for all users and sources func (cm *CacheManager) ClearAppRenderFailedData() { - glog.Info("INFO: Starting periodic cleanup of AppRenderFailed data") + glog.Info("INFO: [Cleanup] Starting periodic cleanup of AppRenderFailed data") cm.mutex.RLock() if cm.cache == nil { diff --git a/internal/v2/appinfo/hydration.go b/internal/v2/appinfo/hydration.go index 83f8ad6..b0796d0 100644 --- a/internal/v2/appinfo/hydration.go +++ b/internal/v2/appinfo/hydration.go @@ -228,41 +228,41 @@ func (h *Hydrator) isAppHydrationComplete(pendingData *types.AppInfoLatestPendin appName = pendingData.RawData.Name } - glog.V(3).Infof("DEBUG: isAppHydrationComplete checking appID=%s, name=%s, RawPackage=%s, RenderedPackage=%s", + glog.V(3).Infof("DEBUG: isAppHydrationComplete checking appID=%s(%s), RawPackage=%s, RenderedPackage=%s", appID, appName, pendingData.RawPackage, pendingData.RenderedPackage) if pendingData.RawPackage == "" { - glog.Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - RawPackage is empty for appID=%s, name=%s", appID, appName) + glog.Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - RawPackage is empty for appID=%s(%s)", appID, appName) return false } if pendingData.RenderedPackage == "" { - glog.Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - RenderedPackage is empty for appID=%s, name=%s", appID, appName) + glog.Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - RenderedPackage is empty for appID=%s(%s)", appID, appName) return false } if pendingData.AppInfo == nil { - glog.Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - AppInfo is nil for appID=%s, name=%s", appID, appName) + glog.Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - AppInfo is nil for appID=%s(%s)", appID, appName) return false } imageAnalysis := pendingData.AppInfo.ImageAnalysis if imageAnalysis == nil { - glog.Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - ImageAnalysis is nil for appID=%s, name=%s", appID, appName) + glog.Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - ImageAnalysis is nil for appID=%s(%s)", appID, appName) return false } if imageAnalysis.TotalImages > 0 { - glog.Infof("DEBUG: isAppHydrationComplete RETURNING TRUE - TotalImages > 0 for appID=%s, name=%s, TotalImages: %d", appID, appName, imageAnalysis.TotalImages) + glog.Infof("DEBUG: isAppHydrationComplete RETURNING TRUE - TotalImages > 0 for appID=%s(%s), TotalImages: %d", appID, appName, imageAnalysis.TotalImages) return true } if imageAnalysis.TotalImages == 0 && imageAnalysis.Images != nil { - glog.Infof("DEBUG: isAppHydrationComplete RETURNING TRUE - TotalImages=0 but Images not nil for appID=%s, name=%s, Images: %v", appID, appName, imageAnalysis.Images) + glog.Infof("DEBUG: isAppHydrationComplete RETURNING TRUE - TotalImages=0 but Images not nil for appID=%s(%s), Images: %v", appID, appName, imageAnalysis.Images) return true } - glog.V(2).Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - ImageAnalysis incomplete for appID=%s, name=%s, TotalImages: %d, Images: %v", appID, appName, imageAnalysis.TotalImages, imageAnalysis.Images) + glog.V(2).Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - ImageAnalysis incomplete for appID=%s(%s), TotalImages: %d, Images: %v", appID, appName, imageAnalysis.TotalImages, imageAnalysis.Images) return false } @@ -846,7 +846,7 @@ func (h *Hydrator) triggerDatabaseSync() { return } - glog.V(3).Infof("Triggering database synchronization") + glog.V(2).Infof("Triggering database synchronization") // Force sync all cache data to Redis/database if err := h.cacheManager.ForceSync(); err != nil { diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go index 4b0f5ae..bdafc29 100644 --- a/internal/v2/appinfo/pipeline.go +++ b/internal/v2/appinfo/pipeline.go @@ -172,7 +172,8 @@ func (p *Pipeline) phaseHydrateApps(ctx context.Context) map[string]bool { return affectedUsers } - p.cacheManager.RestoreRetryableFailedToPending(50) + count := p.cacheManager.RestoreRetryableFailedToPending(20) + glog.Infof("Pipeline Phase 2: restore %d Failed to Pending", count) items := p.cacheManager.CollectAllPendingItems() diff --git a/internal/v2/appinfo/syncerfn/detail_fetch_step.go b/internal/v2/appinfo/syncerfn/detail_fetch_step.go index 50be47b..d3b8425 100644 --- a/internal/v2/appinfo/syncerfn/detail_fetch_step.go +++ b/internal/v2/appinfo/syncerfn/detail_fetch_step.go @@ -487,8 +487,9 @@ func (d *DetailFetchStep) fetchAppsBatch(ctx context.Context, appIDs []string, d glog.V(3).Info("Mutex lock released successfully") // Now remove apps from cache after releasing the main lock to avoid nested locks + var source = data.GetMarketSource() for _, appToRemove := range appsToRemove { - d.removeAppFromCache(appToRemove.appID, appToRemove.appInfoMap, data) + d.removeAppFromCache(appToRemove.appID, appToRemove.appInfoMap, data, source) } // Count successful and failed apps @@ -554,7 +555,7 @@ func (d *DetailFetchStep) fetchAppsBatch(ctx context.Context, appIDs []string, d } // removeAppFromCache removes an app from cache for all users -func (d *DetailFetchStep) removeAppFromCache(appID string, appInfoMap map[string]interface{}, data *SyncContext) { +func (d *DetailFetchStep) removeAppFromCache(appID string, appInfoMap map[string]interface{}, data *SyncContext, source *settings.MarketSource) { appName, ok := appInfoMap["name"].(string) if !ok || appName == "" { glog.V(3).Infof("Warning: Cannot remove app from cache - app name is empty for app: %s", appID) @@ -570,14 +571,6 @@ func (d *DetailFetchStep) removeAppFromCache(appID string, appInfoMap map[string glog.V(3).Infof("Starting to remove app %s %s from cache", appID, appName) - // Get app name for matching - when an app is suspended, remove ALL versions of that app - - // Get source ID from market source - source := data.GetMarketSource() - if source == nil { - glog.V(3).Infof("Warning: MarketSource is nil, cannot remove app %s %s from cache", appID, appName) - return - } // IMPORTANT: use MarketSource.ID as the key for Sources map (not Name) sourceID := source.ID glog.V(2).Infof("Removing all versions of app %s(%s) from cache for source: %s [SUSPEND/REMOVE]", appID, appName, sourceID) @@ -599,11 +592,14 @@ func (d *DetailFetchStep) cleanupSuspendedAppsFromLatestData(data *SyncContext) } sourceID := "" - if marketSource := data.GetMarketSource(); marketSource != nil { - // IMPORTANT: use MarketSource.ID as the key for Sources map (not Name) - sourceID = marketSource.ID + marketSource := data.GetMarketSource() + if marketSource == nil { + glog.Error("[DetailFetchStep] MarketSource not found") + return } + sourceID = marketSource.ID + // Collect apps to remove appsToRemove := make([]struct { appID string @@ -688,7 +684,7 @@ func (d *DetailFetchStep) cleanupSuspendedAppsFromLatestData(data *SyncContext) } } if appInfoMapForRemoval != nil { - d.removeAppFromCache(appIDForRemoval, appInfoMapForRemoval, data) + d.removeAppFromCache(appIDForRemoval, appInfoMapForRemoval, data, marketSource) } } } diff --git a/internal/v2/runtime/store.go b/internal/v2/runtime/store.go index e0687d5..e913070 100644 --- a/internal/v2/runtime/store.go +++ b/internal/v2/runtime/store.go @@ -4,8 +4,6 @@ import ( "fmt" "sync" "time" - - "github.com/golang/glog" ) // StateStore manages the current runtime state in memory @@ -30,11 +28,8 @@ func NewStateStore() *StateStore { // UpdateAppState updates or creates an app flow state func (s *StateStore) UpdateAppState(state *AppFlowState) { - if !s.mu.TryLock() { - glog.Warning("[TryLock] Failed to acquire lock for UpdateAppState, skipping update for app: %s", state.AppName) - return - } - defer s.mu.Unlock() + // s.mu.Lock() + // defer s.mu.Unlock() key := s.getAppStateKey(state.UserID, state.SourceID, state.AppName) state.LastUpdate = time.Now() @@ -44,10 +39,7 @@ func (s *StateStore) UpdateAppState(state *AppFlowState) { // GetAppState retrieves an app flow state func (s *StateStore) GetAppState(userID, sourceID, appName string) (*AppFlowState, bool) { - if !s.mu.TryRLock() { - glog.Warningf("[TryRLock] Failed to acquire read lock for GetAppState, returning empty for app: %s", appName) - return nil, false - } + s.mu.RLock() defer s.mu.RUnlock() key := s.getAppStateKey(userID, sourceID, appName) @@ -57,10 +49,7 @@ func (s *StateStore) GetAppState(userID, sourceID, appName string) (*AppFlowStat // GetAllAppStates returns all app states func (s *StateStore) GetAllAppStates() map[string]*AppFlowState { - if !s.mu.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for GetAllAppStates, returning empty map") - return make(map[string]*AppFlowState) - } + s.mu.RLock() defer s.mu.RUnlock() result := make(map[string]*AppFlowState) @@ -72,10 +61,7 @@ func (s *StateStore) GetAllAppStates() map[string]*AppFlowState { // UpdateTask updates or creates a task state func (s *StateStore) UpdateTask(task *TaskState) { - if !s.mu.TryLock() { - glog.Warningf("[TryLock] Failed to acquire lock for UpdateTask, skipping update for task: %s, opId: %s, app: %s", task.TaskID, task.OpID, task.AppName) - return - } + s.mu.Lock() defer s.mu.Unlock() s.tasks[task.TaskID] = task @@ -84,10 +70,7 @@ func (s *StateStore) UpdateTask(task *TaskState) { // GetTask retrieves a task state func (s *StateStore) GetTask(taskID string) (*TaskState, bool) { - if !s.mu.TryRLock() { - glog.Warningf("[TryRLock] Failed to acquire read lock for GetTask, returning empty for task: %s", taskID) - return nil, false - } + s.mu.RLock() defer s.mu.RUnlock() task, ok := s.tasks[taskID] @@ -96,10 +79,7 @@ func (s *StateStore) GetTask(taskID string) (*TaskState, bool) { // GetAllTasks returns all tasks func (s *StateStore) GetAllTasks() map[string]*TaskState { - if !s.mu.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for GetAllTasks, returning empty map") - return make(map[string]*TaskState) - } + s.mu.RLock() defer s.mu.RUnlock() result := make(map[string]*TaskState) @@ -111,10 +91,7 @@ func (s *StateStore) GetAllTasks() map[string]*TaskState { // RemoveTask removes a completed/failed/canceled task after some time func (s *StateStore) RemoveTask(taskID string) { - if !s.mu.TryLock() { - glog.Warningf("[TryLock] Failed to acquire lock for RemoveTask, skipping removal for task: %s", taskID) - return - } + s.mu.Lock() defer s.mu.Unlock() delete(s.tasks, taskID) @@ -123,10 +100,7 @@ func (s *StateStore) RemoveTask(taskID string) { // UpdateComponent updates or creates a component status func (s *StateStore) UpdateComponent(component *ComponentStatus) { - if !s.mu.TryLock() { - glog.Warningf("[TryLock] Failed to acquire lock for UpdateComponent, skipping update for component: %s", component.Name) - return - } + s.mu.Lock() defer s.mu.Unlock() component.LastCheck = time.Now() @@ -136,10 +110,7 @@ func (s *StateStore) UpdateComponent(component *ComponentStatus) { // GetComponent retrieves a component status func (s *StateStore) GetComponent(name string) (*ComponentStatus, bool) { - if !s.mu.TryRLock() { - glog.Warningf("[TryRLock] Failed to acquire read lock for GetComponent, returning empty for component: %s", name) - return nil, false - } + s.mu.RLock() defer s.mu.RUnlock() component, ok := s.components[name] @@ -148,10 +119,7 @@ func (s *StateStore) GetComponent(name string) (*ComponentStatus, bool) { // GetAllComponents returns all component statuses func (s *StateStore) GetAllComponents() map[string]*ComponentStatus { - if !s.mu.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for GetAllComponents, returning empty map") - return make(map[string]*ComponentStatus) - } + s.mu.RLock() defer s.mu.RUnlock() result := make(map[string]*ComponentStatus) @@ -163,16 +131,7 @@ func (s *StateStore) GetAllComponents() map[string]*ComponentStatus { // GetSnapshot creates a complete snapshot of current state func (s *StateStore) GetSnapshot() *RuntimeSnapshot { - if !s.mu.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for GetSnapshot, returning empty snapshot") - return &RuntimeSnapshot{ - Timestamp: time.Now(), - AppStates: make(map[string]*AppFlowState), - Tasks: make(map[string]*TaskState), - Components: make(map[string]*ComponentStatus), - Summary: &RuntimeSummary{}, - } - } + s.mu.RLock() defer s.mu.RUnlock() snapshot := &RuntimeSnapshot{ @@ -254,10 +213,7 @@ func (s *StateStore) getAppStateKey(userID, sourceID, appName string) string { // UpdateChartRepoStatus updates chart repo status func (s *StateStore) UpdateChartRepoStatus(status *ChartRepoStatus) { - if !s.mu.TryLock() { - glog.Warning("[TryLock] Failed to acquire lock for UpdateChartRepoStatus, skipping update") - return - } + s.mu.Lock() defer s.mu.Unlock() if status != nil { @@ -269,20 +225,16 @@ func (s *StateStore) UpdateChartRepoStatus(status *ChartRepoStatus) { // GetChartRepoStatus retrieves chart repo status func (s *StateStore) GetChartRepoStatus() *ChartRepoStatus { - if !s.mu.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for GetChartRepoStatus, returning nil") - return nil - } + s.mu.RLock() defer s.mu.RUnlock() + return s.chartRepo } // GetLastUpdate returns the last update time func (s *StateStore) GetLastUpdate() time.Time { - if !s.mu.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for GetLastUpdate, returning zero time") - return time.Time{} - } + s.mu.RLock() defer s.mu.RUnlock() + return s.lastUpdate } From 058e5e143d99817d0bb1473a288ca6fbaa3e5177 Mon Sep 17 00:00:00 2001 From: aby913 Date: Mon, 9 Mar 2026 15:01:39 +0800 Subject: [PATCH 31/45] fix: adjust app operator logs --- internal/v2/appinfo/appinfomodule.go | 8 ++--- internal/v2/appinfo/datasender_app.go | 4 +-- internal/v2/appinfo/datawatcher_app.go | 27 ++++++++------- internal/v2/appinfo/datawatcher_repo.go | 34 +++++++++---------- internal/v2/appinfo/datawatcher_user.go | 4 +-- internal/v2/appinfo/diagnostic.go | 4 +-- internal/v2/appinfo/hydration.go | 2 +- internal/v2/appinfo/pipeline.go | 22 ++++++------ .../v2/appinfo/status_correction_check.go | 34 ++++++++++--------- internal/v2/appinfo/syncer.go | 6 ++-- internal/v2/task/app_cancel.go | 4 +-- internal/v2/task/app_clone.go | 2 +- internal/v2/task/app_install.go | 4 +-- internal/v2/task/app_uninstall.go | 4 +-- internal/v2/task/app_upgrade.go | 4 +-- pkg/v2/api/task.go | 20 +++++------ 16 files changed, 94 insertions(+), 89 deletions(-) diff --git a/internal/v2/appinfo/appinfomodule.go b/internal/v2/appinfo/appinfomodule.go index 75cbeef..ee23a08 100644 --- a/internal/v2/appinfo/appinfomodule.go +++ b/internal/v2/appinfo/appinfomodule.go @@ -762,7 +762,7 @@ func (m *AppInfoModule) correctCacheWithChartRepo() error { // Build the set of delisted app IDs (apps NOT in validApps) delistedAppIDs := make(map[string]bool) - allUsersData := m.cacheManager.GetAllUsersData() + allUsersData := m.cacheManager.GetAllUsersData() // ~ correctCacheWithChartRepo for _, userData := range allUsersData { for sourceID, sourceData := range userData.Sources { for _, app := range sourceData.AppInfoLatest { @@ -1110,7 +1110,7 @@ func (m *AppInfoModule) SetAppData(userID, sourceID string, dataType AppDataType if !m.isStarted || m.cacheManager == nil { return fmt.Errorf("module is not started or cache manager is not available") } - return m.cacheManager.SetAppData(userID, sourceID, dataType, data) + return m.cacheManager.SetAppData(userID, sourceID, dataType, data, "AppInfoModule") } // GetAppData is a convenience function to get app data @@ -1318,7 +1318,7 @@ func (m *AppInfoModule) GetCachedUsers() []string { return []string{} } - allUsersData := m.cacheManager.GetAllUsersData() + allUsersData := m.cacheManager.GetAllUsersData() // not used users := make([]string, 0, len(allUsersData)) for userID := range allUsersData { users = append(users, userID) @@ -1357,7 +1357,7 @@ func (m *AppInfoModule) GetInvalidDataReport() map[string]interface{} { }, } - allUsersForReport := m.cacheManager.GetAllUsersData() + allUsersForReport := m.cacheManager.GetAllUsersData() // not used totalUsers := 0 totalSources := 0 diff --git a/internal/v2/appinfo/datasender_app.go b/internal/v2/appinfo/datasender_app.go index 0267b44..b17761e 100644 --- a/internal/v2/appinfo/datasender_app.go +++ b/internal/v2/appinfo/datasender_app.go @@ -115,8 +115,8 @@ func (ds *DataSender) SendAppInfoUpdate(update types.AppInfoUpdate) error { subject := fmt.Sprintf("%s.%s", ds.subject, update.User) // Log before sending - if len(string(data)) > 500 { - glog.V(2).Infof("App - Sending app info update to NATS subject '%s': %s", subject, string(data)[:500]) + if len(string(data)) > 800 { + glog.V(2).Infof("App - Sending app info update to NATS subject '%s': %s", subject, string(data)[:800]) } else { glog.V(2).Infof("App - Sending app info update to NATS subject '%s': %s", subject, string(data)) } diff --git a/internal/v2/appinfo/datawatcher_app.go b/internal/v2/appinfo/datawatcher_app.go index 9a16292..b3f253f 100644 --- a/internal/v2/appinfo/datawatcher_app.go +++ b/internal/v2/appinfo/datawatcher_app.go @@ -103,6 +103,7 @@ func (dw *DataWatcher) IsRunning() bool { } // watchLoop is the main monitoring loop +// ~ not used func (dw *DataWatcher) watchLoop(ctx context.Context) { glog.Infof("DataWatcher monitoring loop started") defer glog.Infof("DataWatcher monitoring loop stopped") @@ -111,7 +112,7 @@ func (dw *DataWatcher) watchLoop(ctx context.Context) { defer ticker.Stop() // Run once immediately - dw.processCompletedApps() + dw.processCompletedApps() // not used for { select { @@ -122,7 +123,7 @@ func (dw *DataWatcher) watchLoop(ctx context.Context) { glog.Infof("DataWatcher stopped due to explicit stop") return case <-ticker.C: - dw.processCompletedApps() + dw.processCompletedApps() // not used } } } @@ -148,7 +149,7 @@ func (dw *DataWatcher) processCompletedApps() { // Get all users data from cache manager with timeout var allUsersData map[string]*types.UserData - allUsersData = dw.cacheManager.GetAllUsersData() + allUsersData = dw.cacheManager.GetAllUsersData() // not used if len(allUsersData) == 0 { glog.Infof("DataWatcher: No users data found, processing cycle completed") @@ -172,7 +173,7 @@ func (dw *DataWatcher) processCompletedApps() { // Process batch when it's full or we've reached the end if len(userBatch) >= batchSize || userCount == len(allUsersData) { - batchProcessed, batchMoved := dw.processUserBatch(ctx, userBatch, userDataBatch) + batchProcessed, batchMoved := dw.processUserBatch(ctx, userBatch, userDataBatch) // not used totalProcessed += batchProcessed totalMoved += batchMoved @@ -224,7 +225,7 @@ func (dw *DataWatcher) processUserBatch(ctx context.Context, userIDs []string, u } glog.V(3).Infof("DataWatcher: Processing user %d/%d in batch: %s", i+1, len(userIDs), userID) - processed, moved := dw.processUserData(userID, userData) + processed, moved := dw.processUserData(userID, userData) // not used totalProcessed += processed totalMoved += moved glog.V(2).Infof("DataWatcher: User %s completed: %d processed, %d moved", userID, processed, moved) @@ -234,6 +235,7 @@ func (dw *DataWatcher) processUserBatch(ctx context.Context, userIDs []string, u } // processUserData processes a single user's data +// ~ not used func (dw *DataWatcher) processUserData(userID string, userData *types.UserData) (int64, int64) { if userData == nil { return 0, 0 @@ -250,7 +252,7 @@ func (dw *DataWatcher) processUserData(userID string, userData *types.UserData) totalMoved := int64(0) for sourceID, sourceData := range sourceRefs { - processed, moved := dw.processSourceData(userID, sourceID, sourceData) + processed, moved := dw.processSourceData(userID, sourceID, sourceData) // not used totalProcessed += processed totalMoved += moved } @@ -388,6 +390,7 @@ func (dw *DataWatcher) calculateAndSetUserHashAsync(userID string, userData *typ } // processSourceData processes a single source's data for completed hydration +// ~ not used func (dw *DataWatcher) processSourceData(userID, sourceID string, sourceData *types.SourceData) (int64, int64) { if sourceData == nil { return 0, 0 @@ -455,12 +458,12 @@ func (dw *DataWatcher) processSourceData(userID, sourceID string, sourceData *ty newVersion = latestData.AppInfo.AppEntry.Version } if oldVersion != newVersion { - dw.sendNewAppReadyNotification(userID, completedApp, sourceID) + dw.sendNewAppReadyNotification(userID, completedApp, sourceID) // ~ not used } glog.V(3).Infof("DataWatcher: Replaced existing app: %s", appName) } else { glog.V(2).Infof("DataWatcher: Added new app to latest: %s", appName) - dw.sendNewAppReadyNotification(userID, completedApp, sourceID) + dw.sendNewAppReadyNotification(userID, completedApp, sourceID) // ~ not used } movedCount++ } @@ -914,7 +917,7 @@ func (dw *DataWatcher) ForceCalculateAllUsersHash() error { glog.V(3).Infof("DataWatcher: Force calculating hash for all users") // Get all users data - allUsersData := dw.cacheManager.GetAllUsersData() + allUsersData := dw.cacheManager.GetAllUsersData() // not used if len(allUsersData) == 0 { return fmt.Errorf("no users found in cache") } @@ -1044,7 +1047,7 @@ func (dw *DataWatcher) ProcessSingleAppToLatest(userID, sourceID string, pending appID := dw.getAppID(pendingApp) appName := dw.getAppName(pendingApp) - glog.V(2).Infof("Pipeline: ProcessSingleAppToLatest user=%s, source=%s, id=%s, name=%s", userID, sourceID, appID, appName) + glog.V(2).Infof("Pipeline Phase 2: ProcessSingleAppToLatest user=%s, source=%s, id=%s, name=%s", userID, sourceID, appID, appName) oldVersion, replaced, ok := dw.cacheManager.UpsertLatestAndRemovePending(userID, sourceID, latestData, appID, appName) if !ok { @@ -1057,12 +1060,12 @@ func (dw *DataWatcher) ProcessSingleAppToLatest(userID, sourceID string, pending newVersion = latestData.AppInfo.AppEntry.Version } if oldVersion != newVersion { - dw.sendNewAppReadyNotification(userID, pendingApp, sourceID) + dw.sendNewAppReadyNotification(userID, pendingApp, sourceID) // ~ ProcesSingleAppToLatest } glog.V(2).Infof("ProcessSingleAppToLatest: replaced existing app %s (user=%s, source=%s)", appName, userID, sourceID) } else { glog.V(2).Infof("ProcessSingleAppToLatest: added new app %s (user=%s, source=%s)", appName, userID, sourceID) - dw.sendNewAppReadyNotification(userID, pendingApp, sourceID) + dw.sendNewAppReadyNotification(userID, pendingApp, sourceID) // ~ ProcesSingleAppToLatest } atomic.AddInt64(&dw.totalAppsMoved, 1) diff --git a/internal/v2/appinfo/datawatcher_repo.go b/internal/v2/appinfo/datawatcher_repo.go index 11fd47b..2f9c8de 100644 --- a/internal/v2/appinfo/datawatcher_repo.go +++ b/internal/v2/appinfo/datawatcher_repo.go @@ -138,7 +138,7 @@ func (dwr *DataWatcherRepo) Start() error { glog.V(3).Info("Starting data watcher with 2-minute intervals") // Start the monitoring goroutine - go dwr.monitorStateChanges() + go dwr.monitorStateChanges() // not used return nil } @@ -202,14 +202,14 @@ func (dwr *DataWatcherRepo) monitorStateChanges() { glog.V(3).Info("State change monitoring started") // Process immediately on start - if err := dwr.processStateChanges(); err != nil { + if err := dwr.processStateChanges(); err != nil { // not used glog.Errorf("Error processing state changes on startup: %v", err) } for { select { case <-dwr.ticker.C: - if err := dwr.processStateChanges(); err != nil { + if err := dwr.processStateChanges(); err != nil { // not used glog.Errorf("Error processing state changes: %v", err) } case <-dwr.stopChannel: @@ -221,7 +221,7 @@ func (dwr *DataWatcherRepo) monitorStateChanges() { // processStateChanges fetches and processes new state changes func (dwr *DataWatcherRepo) processStateChanges() map[string]bool { - glog.V(3).Infof("Processing state changes after ID: %d", dwr.lastProcessedID) + glog.V(2).Infof("Processing state changes after ID: %d", dwr.lastProcessedID) affectedUsers := make(map[string]bool) stateChanges, err := dwr.fetchStateChanges(dwr.lastProcessedID) @@ -231,7 +231,7 @@ func (dwr *DataWatcherRepo) processStateChanges() map[string]bool { } if len(stateChanges) == 0 { - glog.V(3).Info("No new state changes found") + glog.V(2).Info("No new state changes found") return affectedUsers } @@ -250,17 +250,17 @@ func (dwr *DataWatcherRepo) processStateChanges() map[string]bool { continue } - // Track affected users from each change type - if change.AppData != nil && change.AppData.UserID != "" { - affectedUsers[change.AppData.UserID] = true - } - if change.Type == "image_info_updated" { - // Image updates affect all users - allUsers := dwr.cacheManager.GetAllUsersData() - for userID := range allUsers { - affectedUsers[userID] = true - } - } + // // Track affected users from each change type + // if change.AppData != nil && change.AppData.UserID != "" { + // affectedUsers[change.AppData.UserID] = true + // } + // if change.Type == "image_info_updated" { + // // Image updates affect all users + // allUsers := dwr.cacheManager.GetAllUsersData() + // for userID := range allUsers { + // affectedUsers[userID] = true + // } + // } lastProcessedID = change.ID } @@ -349,7 +349,7 @@ func (dwr *DataWatcherRepo) handleAppUploadCompleted(change *StateChange) error shouldUpdate := dwr.shouldUpdateAppInCache(change.AppData.UserID, change.AppData.Source, change.AppData.AppName, appInfo) if !shouldUpdate { - glog.V(3).Infof("App %s already exists in cache with same or newer version for user %s, source %s", + glog.V(2).Infof("App %s already exists in cache with same or newer version for user %s, source %s", change.AppData.AppName, change.AppData.UserID, change.AppData.Source) return nil } diff --git a/internal/v2/appinfo/datawatcher_user.go b/internal/v2/appinfo/datawatcher_user.go index 61f1b3c..fc40aaa 100644 --- a/internal/v2/appinfo/datawatcher_user.go +++ b/internal/v2/appinfo/datawatcher_user.go @@ -177,7 +177,7 @@ func (dw *DataWatcherUser) subscribeToMessages() error { // processMessage processes incoming NATS messages func (dw *DataWatcherUser) processMessage(data []byte) { - glog.V(2).Infof("User - Received message from NATS subject %s: %s", string(data)) + glog.V(2).Infof("User - Received message from NATS subject %s", string(data)) var message UserStateMessage if err := json.Unmarshal(data, &message); err != nil { @@ -186,7 +186,7 @@ func (dw *DataWatcherUser) processMessage(data []byte) { } // Print the received message - glog.V(3).Infof("Received app state message - EventType: %s, Username: %s, Timestamp: %s", + glog.V(2).Infof("User - Received watcher user message - EventType: %s, Username: %s, Timestamp: %s", message.EventType, message.Username, message.Timestamp) // Write to history diff --git a/internal/v2/appinfo/diagnostic.go b/internal/v2/appinfo/diagnostic.go index 6cbae2c..66f0000 100644 --- a/internal/v2/appinfo/diagnostic.go +++ b/internal/v2/appinfo/diagnostic.go @@ -111,8 +111,8 @@ func (cm *CacheManager) GetDiagnosticJSON() (string, error) { } // Get cache stats and users data for JSON response - cacheStats := cm.GetCacheStats() - allUsersData := cm.GetAllUsersData() + cacheStats := cm.GetCacheStats() // not used + allUsersData := cm.GetAllUsersData() // not used diagnosticInfo := map[string]interface{}{ "cache_stats": cacheStats, diff --git a/internal/v2/appinfo/hydration.go b/internal/v2/appinfo/hydration.go index b0796d0..9be6108 100644 --- a/internal/v2/appinfo/hydration.go +++ b/internal/v2/appinfo/hydration.go @@ -590,7 +590,7 @@ func (h *Hydrator) moveTaskToRenderFailed(task *hydrationfn.HydrationTask, failu // Add to render failed list in cache if err := h.cacheManager.SetAppData(task.UserID, task.SourceID, types.AppRenderFailed, map[string]interface{}{ "failed_app": failedData, - }); err != nil { + }, "Hydrator"); err != nil { glog.Errorf("Failed to add task to render failed list: %s, error: %v", task.ID, err) return } diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go index bdafc29..3b8e50a 100644 --- a/internal/v2/appinfo/pipeline.go +++ b/internal/v2/appinfo/pipeline.go @@ -172,8 +172,8 @@ func (p *Pipeline) phaseHydrateApps(ctx context.Context) map[string]bool { return affectedUsers } - count := p.cacheManager.RestoreRetryableFailedToPending(20) - glog.Infof("Pipeline Phase 2: restore %d Failed to Pending", count) + // count := p.cacheManager.RestoreRetryableFailedToPending(20) + // glog.Infof("Pipeline Phase 2: restore %d Failed to Pending", count) items := p.cacheManager.CollectAllPendingItems() @@ -281,7 +281,7 @@ func (p *Pipeline) phaseDataWatcherRepo(ctx context.Context) map[string]bool { return nil default: } - glog.V(3).Info("Pipeline Phase 3: DataWatcherRepo") + glog.V(2).Info("Pipeline Phase 3: DataWatcherRepo") return p.dataWatcherRepo.ProcessOnce() } @@ -297,7 +297,7 @@ func (p *Pipeline) phaseStatusCorrection(ctx context.Context) map[string]bool { return nil default: } - glog.V(3).Info("Pipeline Phase 4: StatusCorrectionChecker") + glog.V(2).Info("Pipeline Phase 4: StatusCorrectionChecker") return p.statusCorrectionChecker.PerformStatusCheckOnce() } @@ -315,7 +315,7 @@ func (p *Pipeline) phaseHashAndSync(affectedUsers map[string]bool) { } if p.cacheManager != nil { if err := p.cacheManager.ForceSync(); err != nil { - glog.Warningf("Pipeline: ForceSync rate limited: %v", err) + glog.Errorf("Pipeline Phase 5: ForceSync rate limited: %v", err) } } } @@ -348,7 +348,7 @@ func (h *Hydrator) HydrateSingleApp(ctx context.Context, userID, sourceID string } if h.isAppInRenderFailedList(userID, sourceID, appID, appName) { - glog.V(2).Infof("HydrateSingleApp: skipping %s %s (user=%s, source=%s) - in render failed list, will retry after cleanup", + glog.V(2).Infof("HydrateSingleApp: skipping %s(%s) (user=%s, source=%s) - in render failed list, will retry after cleanup", appID, appName, userID, sourceID) return false } @@ -362,14 +362,14 @@ func (h *Hydrator) HydrateSingleApp(ctx context.Context, userID, sourceID string version = pendingData.RawData.Version } if h.isAppInLatestQueue(userID, sourceID, appID, appName, version) { - glog.V(2).Infof("HydrateSingleApp: skipping %s %s (user=%s, source=%s) - already in latest queue with version %s", + glog.V(2).Infof("HydrateSingleApp: skipping %s(%s) (user=%s, source=%s) - already in latest queue with version %s", appID, appName, userID, sourceID, version) return false } appDataMap := h.convertApplicationInfoEntryToMap(pendingData.RawData) if len(appDataMap) == 0 { - glog.V(2).Infof("HydrateSingleApp: skipping %s %s (user=%s, source=%s) - convertApplicationInfoEntryToMap returned empty", + glog.V(2).Infof("HydrateSingleApp: skipping %s(%s) (user=%s, source=%s) - convertApplicationInfoEntryToMap returned empty", appID, appName, userID, sourceID) return false } @@ -394,7 +394,7 @@ func (h *Hydrator) HydrateSingleApp(ctx context.Context, userID, sourceID string if err := step.Execute(ctx, task); err != nil { failureReason := err.Error() failureStep := step.GetStepName() - glog.Errorf("HydrateSingleApp: step %s failed for app %s %s: %v", failureStep, appID, appName, err) + glog.Errorf("HydrateSingleApp: step %s failed for app %s(%s): %v", failureStep, appID, appName, err) h.moveTaskToRenderFailed(task, failureReason, failureStep) duration := time.Since(taskStartTime) h.markTaskFailed(task, taskStartTime, duration, failureStep, failureReason) @@ -404,13 +404,13 @@ func (h *Hydrator) HydrateSingleApp(ctx context.Context, userID, sourceID string } if !h.isAppHydrationComplete(pendingData) { - glog.Warningf("HydrateSingleApp: steps completed but data incomplete for app %s %s, will retry next cycle", appID, appName) + glog.Warningf("HydrateSingleApp: steps completed but data incomplete for app %s(%s), will retry next cycle", appID, appName) return false } task.SetStatus(hydrationfn.TaskStatusCompleted) duration := time.Since(taskStartTime) h.markTaskCompleted(task, taskStartTime, duration) - glog.V(2).Infof("HydrateSingleApp: completed for app %s %s in %v", appID, appName, duration) + glog.V(2).Infof("HydrateSingleApp: completed for app %s(%s) in %v", appID, appName, duration) return true } diff --git a/internal/v2/appinfo/status_correction_check.go b/internal/v2/appinfo/status_correction_check.go index 285ff87..077213a 100644 --- a/internal/v2/appinfo/status_correction_check.go +++ b/internal/v2/appinfo/status_correction_check.go @@ -102,7 +102,7 @@ func (scc *StatusCorrectionChecker) Start() error { glog.Infof("Middleware service endpoint: http://%s:%s/app-service/v1/middlewares/status", scc.appServiceHost, scc.appServicePort) // Start the periodic checking goroutine - go scc.runPeriodicCheck() + go scc.runPeriodicCheck() // not used return nil } @@ -120,7 +120,7 @@ func (scc *StatusCorrectionChecker) StartWithOptions(enablePeriodicCheck bool) e if enablePeriodicCheck { glog.Infof("Starting status correction checker with interval: %v", scc.checkInterval) - go scc.runPeriodicCheck() + go scc.runPeriodicCheck() // not use } else { glog.Infof("Starting status correction checker in passive mode (serial pipeline handles processing)") } @@ -134,7 +134,7 @@ func (scc *StatusCorrectionChecker) PerformStatusCheckOnce() map[string]bool { if !scc.isRunning { return nil } - return scc.performStatusCheck() + return scc.performStatusCheck() // pipeline start } // Stop stops the periodic status checking @@ -187,12 +187,12 @@ func (scc *StatusCorrectionChecker) runPeriodicCheck() { glog.Infof("Status correction checker periodic loop started") // Perform initial check immediately - scc.performStatusCheck() + scc.performStatusCheck() // not use for { select { case <-ticker.C: - scc.performStatusCheck() + scc.performStatusCheck() // not use case <-scc.stopChan: glog.Infof("Status correction checker periodic loop stopped") return @@ -245,7 +245,7 @@ func (scc *StatusCorrectionChecker) performStatusCheck() map[string]bool { for userID, cs := range changesByUser { userData := scc.cacheManager.GetUserData(userID) if userData == nil { - glog.V(3).Infof("StatusCorrectionChecker: userData not found for user %s", userID) + glog.Warningf("StatusCorrectionChecker: userData not found for user %s", userID) continue } @@ -293,6 +293,8 @@ func (scc *StatusCorrectionChecker) fetchLatestStatus() ([]utils.AppServiceRespo return appsStatus, nil } + // glog.Infof("[SCC] appStatus: %s, middlewareStatus: %s", utils.ParseJson(appsStatus), utils.ParseJson(middlewaresStatus)) + // Combine apps and middlewares status // Convert middlewares to AppServiceResponse format and merge with apps allStatus := make([]utils.AppServiceResponse, 0, len(appsStatus)+len(middlewaresStatus)) @@ -899,13 +901,13 @@ func (scc *StatusCorrectionChecker) applyCorrections(changes []StatusChange, lat } } - appStateData, sourceID := scc.createAppStateDataFromResponse(*appToUpdate, change.UserID) + appStateData, sourceID := scc.createAppStateDataFromResponse(*appToUpdate, change.UserID) // app_appeared if appStateData == nil { glog.V(3).Infof("Failed to create app state data for appeared app %s (user: %s)", change.AppName, change.UserID) continue } - stateData := scc.createStateDataFromAppStateData(appStateData) - if err := scc.cacheManager.SetAppData(change.UserID, sourceID, AppStateLatest, stateData); err != nil { + stateData := scc.createStateDataFromAppStateData(appStateData) // app_appeared + if err := scc.cacheManager.SetAppData(change.UserID, sourceID, AppStateLatest, stateData, "SCC_app_appeared"); err != nil { glog.Errorf("Failed to add appeared app %s to cache (user: %s, source: %s): %v", change.AppName, change.UserID, sourceID, err) } else { @@ -941,13 +943,13 @@ func (scc *StatusCorrectionChecker) applyCorrections(changes []StatusChange, lat } } - appStateData, sourceID := scc.createAppStateDataFromResponse(*appToUpdate, change.UserID) + appStateData, sourceID := scc.createAppStateDataFromResponse(*appToUpdate, change.UserID) // state_change if appStateData == nil { glog.V(3).Infof("Failed to create app state data for app %s (user: %s)", change.AppName, change.UserID) continue } - stateData := scc.createStateDataFromAppStateData(appStateData) - if err := scc.cacheManager.SetAppData(change.UserID, sourceID, AppStateLatest, stateData); err != nil { + stateData := scc.createStateDataFromAppStateData(appStateData) // state_change + if err := scc.cacheManager.SetAppData(change.UserID, sourceID, AppStateLatest, stateData, "SCC_state_change"); err != nil { glog.Errorf("Failed to update cache with corrected status for app %s (user: %s, source: %s): %v", change.AppName, change.UserID, sourceID, err) } else { @@ -997,14 +999,14 @@ func (scc *StatusCorrectionChecker) applyCorrections(changes []StatusChange, lat } } - appStateData, sourceID := scc.createAppStateDataFromResponse(*appToUpdate, change.UserID) + appStateData, sourceID := scc.createAppStateDataFromResponse(*appToUpdate, change.UserID) // state_inconsistency if appStateData == nil { glog.V(3).Infof("Failed to create app state data for app %s (user: %s)", change.AppName, change.UserID) continue } appStateData.Status.State = "running" - stateData := scc.createStateDataFromAppStateData(appStateData) - if err := scc.cacheManager.SetAppData(change.UserID, sourceID, AppStateLatest, stateData); err != nil { + stateData := scc.createStateDataFromAppStateData(appStateData) // state_inconsistency + if err := scc.cacheManager.SetAppData(change.UserID, sourceID, AppStateLatest, stateData, "SCC_state_inconsistency"); err != nil { glog.Errorf("Failed to update cache with corrected state for inconsistent app %s (user: %s, source: %s): %v", change.AppName, change.UserID, sourceID, err) } else { @@ -1265,7 +1267,7 @@ func (scc *StatusCorrectionChecker) ForceCheck() error { } glog.Infof("Forcing immediate status check") - scc.performStatusCheck() + scc.performStatusCheck() // not used return nil } diff --git a/internal/v2/appinfo/syncer.go b/internal/v2/appinfo/syncer.go index fd5868a..a0e0686 100644 --- a/internal/v2/appinfo/syncer.go +++ b/internal/v2/appinfo/syncer.go @@ -124,7 +124,7 @@ func (s *Syncer) StartWithOptions(ctx context.Context, enableSyncLoop bool) erro if enableSyncLoop { glog.V(2).Infof("Starting syncer with %d steps, sync interval: %v", len(s.steps), s.syncInterval) - go s.syncLoop(ctx) + go s.syncLoop(ctx) // not use } else { glog.V(2).Infof("Starting syncer with %d steps (passive mode, Pipeline handles scheduling)", len(s.steps)) } @@ -298,7 +298,7 @@ func (s *Syncer) syncLoop(ctx context.Context) { return default: // Execute sync cycle - if err := s.executeSyncCycle(ctx); err != nil { + if err := s.executeSyncCycle(ctx); err != nil { // not use glog.Errorf("Sync cycle failed: %v", err) } @@ -913,7 +913,7 @@ func (s *Syncer) storeDataViaCacheManager(userIDs []string, sourceID string, com // Use CacheManager.SetAppData to trigger hydration notifications if available if cacheManager := s.cacheManager.Load(); cacheManager != nil { glog.V(3).Infof("Using CacheManager to store data for user: %s, source: %s", userID, sourceID) - err := cacheManager.SetAppData(userID, sourceID, AppInfoLatestPending, completeData) + err := cacheManager.SetAppData(userID, sourceID, AppInfoLatestPending, completeData,"Syncer") if err != nil { glog.Errorf("Failed to store data via CacheManager for user: %s, source: %s, error: %v", userID, sourceID, err) // Fall back to direct cache access diff --git a/internal/v2/task/app_cancel.go b/internal/v2/task/app_cancel.go index dac58e2..7202bec 100644 --- a/internal/v2/task/app_cancel.go +++ b/internal/v2/task/app_cancel.go @@ -59,7 +59,7 @@ func (tm *TaskModule) AppCancel(task *Task) (string, error) { } // Send HTTP request and get response - glog.Infof("Sending HTTP request for app cancel: task=%s, app_name=%s", task.ID, appName) + glog.Infof("[APP] Sending HTTP request for app cancel: task=%s, app_name=%s", task.ID, appName) response, err := sendHttpRequest(http.MethodPost, urlStr, headers, nil) if err != nil { glog.Errorf("HTTP request failed for app cancel: task=%s, error=%v", task.ID, err) @@ -78,7 +78,7 @@ func (tm *TaskModule) AppCancel(task *Task) (string, error) { return string(errorJSON), err } - glog.Infof("HTTP request completed successfully for app cancel: task=%s, response_length=%d", task.ID, len(response)) + glog.Infof("[APP] HTTP request completed successfully for app cancel: task=%s, response_length=%d", task.ID, len(response)) // Parse response to extract opID if cancel is successful var responseData map[string]interface{} diff --git a/internal/v2/task/app_clone.go b/internal/v2/task/app_clone.go index ccc4af2..a8c2634 100644 --- a/internal/v2/task/app_clone.go +++ b/internal/v2/task/app_clone.go @@ -212,7 +212,7 @@ func (tm *TaskModule) AppClone(task *Task) (string, error) { } // Send HTTP request and get response - glog.Infof("Sending HTTP request for app clone: task=%s", task.ID) + glog.Infof("[APP] Sending HTTP request for app clone: task=%s", task.ID) response, err := sendHttpRequest(http.MethodPost, urlStr, headers, strings.NewReader(string(ms))) if err != nil { glog.Errorf("HTTP request failed for app clone: task=%s, error=%v", task.ID, err) diff --git a/internal/v2/task/app_install.go b/internal/v2/task/app_install.go index df93a9e..caa51d3 100644 --- a/internal/v2/task/app_install.go +++ b/internal/v2/task/app_install.go @@ -196,7 +196,7 @@ func (tm *TaskModule) AppInstall(task *Task) (string, error) { } // Send HTTP request and get response - glog.V(2).Infof("Sending HTTP request for app installation: task=%s, data: %s", task.ID, string(ms)) + glog.Infof("[APP] Sending HTTP request for app installation: task=%s, data: %s", task.ID, string(ms)) response, err := sendHttpRequest(http.MethodPost, urlStr, headers, strings.NewReader(string(ms))) if err != nil { glog.Errorf("HTTP request failed for app installation: task=%s, error=%v", task.ID, err) @@ -216,7 +216,7 @@ func (tm *TaskModule) AppInstall(task *Task) (string, error) { return string(errorJSON), err } - glog.V(2).Infof("HTTP request completed successfully for app installation: task=%s, response_length=%d, resp=%s", task.ID, len(response), response) + glog.Infof("[APP] HTTP request completed successfully for app installation: task=%s, response_length=%d, resp=%s", task.ID, len(response), response) // Parse response to extract opID if installation is successful var responseData map[string]interface{} diff --git a/internal/v2/task/app_uninstall.go b/internal/v2/task/app_uninstall.go index a79a9af..c663e72 100644 --- a/internal/v2/task/app_uninstall.go +++ b/internal/v2/task/app_uninstall.go @@ -71,7 +71,7 @@ func (tm *TaskModule) AppUninstall(task *Task) (string, error) { } // Send HTTP request and get response - glog.Infof("Sending HTTP request for app uninstallation: task=%s, all=%v", task.ID, all) + glog.Infof("[APP] Sending HTTP request for app uninstallation: task=%s, all=%v", task.ID, all) // Create request body with all parameter requestBody := map[string]interface{}{ @@ -99,7 +99,7 @@ func (tm *TaskModule) AppUninstall(task *Task) (string, error) { return string(errorJSON), err } - glog.Infof("HTTP request completed successfully for app uninstallation: task=%s, response_length=%d", task.ID, len(response)) + glog.Infof("[APP] HTTP request completed successfully for app uninstallation: task=%s, response_length=%d", task.ID, len(response)) // Parse response to extract opID if uninstallation is successful var responseData map[string]interface{} diff --git a/internal/v2/task/app_upgrade.go b/internal/v2/task/app_upgrade.go index d31e7ff..9b8fff3 100644 --- a/internal/v2/task/app_upgrade.go +++ b/internal/v2/task/app_upgrade.go @@ -120,7 +120,7 @@ func (tm *TaskModule) AppUpgrade(task *Task) (string, error) { } // Send HTTP request and get response - glog.Infof("Sending HTTP request for app upgrade: task=%s, version=%s", task.ID, version) + glog.Infof("[APP] Sending HTTP request for app upgrade: task=%s, version=%s", task.ID, version) response, err := sendHttpRequest(http.MethodPost, urlStr, headers, strings.NewReader(string(ms))) if err != nil { glog.Errorf("HTTP request failed for app upgrade: task=%s, error=%v", task.ID, err) @@ -140,7 +140,7 @@ func (tm *TaskModule) AppUpgrade(task *Task) (string, error) { return string(errorJSON), err } - glog.Infof("HTTP request completed successfully for app upgrade: task=%s, response_length=%d", task.ID, len(response)) + glog.Infof("[APP] HTTP request completed successfully for app upgrade: task=%s, response_length=%d", task.ID, len(response)) // Create success result successResult := map[string]interface{}{ diff --git a/pkg/v2/api/task.go b/pkg/v2/api/task.go index bd86323..91db4b8 100644 --- a/pkg/v2/api/task.go +++ b/pkg/v2/api/task.go @@ -216,11 +216,11 @@ func (s *Server) installApp(w http.ResponseWriter, r *http.Request) { } if targetApp.AppInfo == nil { - glog.V(2).Infof("installApp: targetApp.AppInfo is nil for app=%s source=%s", request.AppName, request.Source) + glog.V(2).Infof("installApp: targetApp.AppInfo is nil for app=%s, source=%s", request.AppName, request.Source) } else if targetApp.AppInfo.Price == nil { - glog.V(2).Infof("installApp: targetApp.AppInfo.Price is nil for app=%s source=%s", request.AppName, request.Source) + glog.V(2).Infof("installApp: targetApp.AppInfo.Price is nil for app=%s, source=%s", request.AppName, request.Source) } else { - glog.V(2).Infof("installApp: targetApp.AppInfo.Price detected for app=%s source=%s", request.AppName, request.Source) + glog.V(2).Infof("installApp: targetApp.AppInfo.Price detected for app=%s, source=%s", request.AppName, request.Source) } // Step 8: Verify chart package exists @@ -254,7 +254,7 @@ func (s *Server) installApp(w http.ResponseWriter, r *http.Request) { } productID, developerName := extractInstallProductMetadata(targetApp.AppInfo) - glog.V(2).Infof("installApp: extracted product metadata app=%s source=%s productID=%s developer=%s", request.AppName, request.Source, productID, developerName) + glog.V(2).Infof("installApp: extracted product metadata app=%s, source=%s, productID=%s, developer=%s", request.AppName, request.Source, productID, developerName) realAppID := request.AppName if targetApp.AppInfo != nil && targetApp.AppInfo.AppEntry != nil && targetApp.AppInfo.AppEntry.ID != "" { @@ -262,7 +262,7 @@ func (s *Server) installApp(w http.ResponseWriter, r *http.Request) { } else if targetApp.RawData != nil && targetApp.RawData.AppID != "" { realAppID = targetApp.RawData.AppID } - glog.V(2).Infof("installApp: resolved realAppID=%s for app=%s source=%s", realAppID, request.AppName, request.Source) + glog.V(2).Infof("installApp: resolved realAppID=%s for app=%s, source=%s, sync=%v", realAppID, request.AppName, request.Source, request.Sync) // Step 10: Create installation task taskMetadata := map[string]interface{}{ @@ -278,15 +278,15 @@ func (s *Server) installApp(w http.ResponseWriter, r *http.Request) { } if productID != "" { taskMetadata["productID"] = productID - glog.V(2).Infof("installApp: added productID=%s to metadata for app=%s source=%s", productID, request.AppName, request.Source) + glog.V(2).Infof("installApp: added productID=%s to metadata for app=%s, source=%s", productID, request.AppName, request.Source) } if developerName != "" { taskMetadata["developerName"] = developerName - glog.V(2).Infof("installApp: added developerName=%s to metadata for app=%s source=%s", developerName, request.AppName, request.Source) + glog.V(2).Infof("installApp: added developerName=%s to metadata for app=%s, source=%s", developerName, request.AppName, request.Source) } if realAppID != "" { taskMetadata["realAppID"] = realAppID - glog.V(2).Infof("installApp: added realAppID=%s to metadata for app=%s source=%s", realAppID, request.AppName, request.Source) + glog.V(2).Infof("installApp: added realAppID=%s to metadata for app=%s, source=%s", realAppID, request.AppName, request.Source) } // Handle synchronous requests with proper blocking @@ -311,7 +311,7 @@ func (s *Server) installApp(w http.ResponseWriter, r *http.Request) { return } - glog.V(2).Infof("Created synchronous installation task: ID=%s for app: %s version: %s", task.ID, request.AppName, request.Version) + glog.V(2).Infof("Created synchronous installation task: ID=%s for app: %s, version: %s", task.ID, request.AppName, request.Version) // Wait for task completion <-done @@ -356,7 +356,7 @@ func (s *Server) installApp(w http.ResponseWriter, r *http.Request) { return } - glog.V(2).Infof("Created asynchronous installation task: ID=%s for app: %s version: %s", task.ID, request.AppName, request.Version) + glog.V(2).Infof("Created asynchronous installation task: ID=%s for app: %s, version: %s", task.ID, request.AppName, request.Version) // Return immediately for asynchronous requests s.sendResponse(w, http.StatusOK, true, "App installation started successfully", map[string]interface{}{ From 2472d433f14d912f62bfbfaf54192ffeb73f6869 Mon Sep 17 00:00:00 2001 From: aby913 Date: Wed, 11 Mar 2026 14:21:24 +0800 Subject: [PATCH 32/45] refactor: compare NATS and local appStateLatest, fix time parse layout --- internal/v2/appinfo/cache.go | 35 ++++- internal/v2/appinfo/datawatcher_state.go | 188 +++++++++++++++-------- 2 files changed, 156 insertions(+), 67 deletions(-) diff --git a/internal/v2/appinfo/cache.go b/internal/v2/appinfo/cache.go index 7b67500..786ee19 100644 --- a/internal/v2/appinfo/cache.go +++ b/internal/v2/appinfo/cache.go @@ -20,6 +20,8 @@ import ( "k8s.io/client-go/tools/cache" ) +type CompareAppStateMsgFunc func(appState *AppStateLatestData) + // CacheManager manages the in-memory cache and Redis synchronization type CacheManager struct { cache *CacheData @@ -1507,8 +1509,9 @@ func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType App return nil } -func (cm *CacheManager) SetAppData(userID, sourceID string, dataType AppDataType, data map[string]interface{}) error { +func (cm *CacheManager) SetAppData(userID, sourceID string, dataType AppDataType, data map[string]interface{}, tracing string) error { + glog.Infof("[SetAppData] user: %s, source: %s, dataType: %s, trace: %s", userID, sourceID, dataType, tracing) // go func() { if err := cm.setAppDataInternal(userID, sourceID, dataType, data); err != nil { glog.Errorf("Failed to set app data in goroutine: %v", err) @@ -2368,6 +2371,7 @@ func (cm *CacheManager) GetSettingsManager() *settings.SettingsManager { } // SyncMarketSourcesToCache synchronizes market sources to all users in cache +// todo remove watch dog func (cm *CacheManager) syncMarketSourcesToCacheInternal(sources []*settings.MarketSource) error { cm.mutex.Lock() _wd := cm.startLockWatchdog("@SyncMarketSourcesToCache") @@ -2530,11 +2534,15 @@ func (cm *CacheManager) ClearAppRenderFailedData() { defer cm.mutex.Unlock() count := 0 + failedAppNames := []string{} for _, t := range targets { if userData, ok := cm.cache.Users[t.userID]; ok { if sourceData, ok := userData.Sources[t.sourceID]; ok { if len(sourceData.AppRenderFailed) > 0 { count += len(sourceData.AppRenderFailed) + for _, f := range sourceData.AppRenderFailed { + failedAppNames = append(failedAppNames, fmt.Sprintf("%s_%s_%s", t.userID, t.sourceID, f.AppInfo.AppEntry.Name)) + } sourceData.AppRenderFailed = make([]*types.AppRenderFailedData, 0) } } @@ -2542,7 +2550,7 @@ func (cm *CacheManager) ClearAppRenderFailedData() { } if count > 0 { - glog.Infof("INFO: [Cleanup] Cleared %d AppRenderFailed entries in %v", count, time.Since(start)) + glog.Infof("INFO: [Cleanup] Cleared %d AppRenderFailed entries in %v, apps: %v", count, time.Since(start), failedAppNames) } } @@ -2653,3 +2661,26 @@ func (cm *CacheManager) GetCachedData() string { result, _ := json.Marshal(items) return string(result) } + +func (cm *CacheManager) CompareAppStateMsg(userID string, sourceID string, appName string, checker CompareAppStateMsgFunc) { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + userData := cm.cache.Users[userID] + if userData == nil { + return + } + + sourceData := userData.Sources[sourceID] + if sourceData == nil { + return + } + + for _, appState := range sourceData.AppStateLatest { + if appState.Status.Name != appName { + continue + } + checker(appState) + return + } +} diff --git a/internal/v2/appinfo/datawatcher_state.go b/internal/v2/appinfo/datawatcher_state.go index bdf09ed..febecc0 100644 --- a/internal/v2/appinfo/datawatcher_state.go +++ b/internal/v2/appinfo/datawatcher_state.go @@ -21,6 +21,8 @@ import ( "github.com/nats-io/nats.go" ) +var nanoTimeLayout = "2006-01-02T15:04:05.999999999Z" // 2006-01-02T15:04:05.000000000Z + // EntranceStatus represents the status of an entrance type EntranceStatus struct { ID string `json:"id"` // ID extracted from URL's first segment after splitting by "." @@ -59,6 +61,7 @@ type AppStateMessage struct { State string `json:"state"` User string `json:"user"` Progress string `json:"progress"` + MarketSource string `json:"marketSource"` EntranceStatuses []EntranceStatus `json:"entranceStatuses"` SharedEntrances []SharedEntrance `json:"sharedEntrances,omitempty"` } @@ -364,7 +367,7 @@ func (dw *DataWatcherState) startNatsConnection() error { } // handleMessage processes incoming NATS messages -func (dw *DataWatcherState) handleMessage(msg *nats.Msg) { +func (dw *DataWatcherState) handleMessage(msg *nats.Msg) { // + glog.V(2).Infof("State - Received message from NATS subject %s: %s", msg.Subject, string(msg.Data)) var appStateMsg AppStateMessage @@ -492,7 +495,7 @@ func (dw *DataWatcherState) handleMessage(msg *nats.Msg) { // This avoids blocking NATS message processing glog.V(2).Infof("Delaying pending state message for app=%s, user=%s, opID=%s - failed to acquire lock", appStateMsg.Name, appStateMsg.User, appStateMsg.OpID) - dw.addDelayedMessage(msg, appStateMsg) + dw.addDelayedMessage(msg, appStateMsg) // install + pending return } if hasPendingTask { @@ -500,7 +503,7 @@ func (dw *DataWatcherState) handleMessage(msg *nats.Msg) { // This avoids matching to the wrong source when a new install starts glog.V(2).Infof("Delaying pending state message for app=%s, user=%s, opID=%s - found pending/running install task", appStateMsg.Name, appStateMsg.User, appStateMsg.OpID) - dw.addDelayedMessage(msg, appStateMsg) + dw.addDelayedMessage(msg, appStateMsg) // install + pending return } } @@ -528,7 +531,7 @@ func (dw *DataWatcherState) handleMessage(msg *nats.Msg) { // Task not found in database, delay to wait for task to be persisted glog.Errorf("Delaying pending state message for app=%s, user=%s, opID=%s - task not found in DB, waiting for persistence", appStateMsg.Name, appStateMsg.User, appStateMsg.OpID) - dw.addDelayedMessage(msg, appStateMsg) + dw.addDelayedMessage(msg, appStateMsg) // install + pending return } // Task found in database, can proceed (storeStateToCache will use OpID to query from DB) @@ -538,73 +541,127 @@ func (dw *DataWatcherState) handleMessage(msg *nats.Msg) { } } - userData := dw.cacheManager.getUserData(appStateMsg.User) - if userData == nil { - glog.V(2).Infof("User data not found for user %s", appStateMsg.User) - return - } - - for _, sourceData := range userData.Sources { - for _, appState := range sourceData.AppStateLatest { - if appState.Status.Name == appStateMsg.Name { // && appState.Status.State == appStateMsg.State - - /** - * [Mandatory Sync Whitelist] - * The cases below define critical state transition scenarios that must be processed. - * - * Background: - * When a user performs an action in the UI (e.g., canceling installation/download) or when an app lifecycle event completes (e.g., installation finished, uninstallation finished), - * the final state pushed by NATS (appStateMsg.State) may differ from the cached state in memory (appState.Status.State). - * - * Purpose: - * Even if the progress (Progress) has not changed and there is no entrance information (EntranceStatuses), - * as long as the following conditions are met, we must bypass the "deduplication check" in the default branch. - * This forces the local state to update and pushes the change to the frontend, ensuring the UI promptly reflects the final result. - */ - switch { - // NATS State APP State - case appStateMsg.State == "running" && appState.Status.State == "installing": - case appStateMsg.State == "running" && appState.Status.State == "initializing": - case appStateMsg.State == "uninstalled" && appState.Status.State == "running": - case appStateMsg.State == "uninstalled" && appState.Status.State == "stopped": - case appStateMsg.State == "uninstalled" && appState.Status.State == "uninstalling": - case appStateMsg.State == "uninstalled" && appState.Status.State == "installingCanceling": - case appStateMsg.State == "uninstalled" && appState.Status.State == "installingCancelFailed": - case appStateMsg.State == "pendingCanceled" && appState.Status.State == "pending": - case appStateMsg.State == "downloadingCanceled" && appState.Status.State == "downloadingCanceling": - case appStateMsg.State == "downloadingCanceled" && appState.Status.State == "pending": - case appStateMsg.State == "installingCanceled" && appState.Status.State == "installing": - case appStateMsg.State == "installingCanceled" && appState.Status.State == "installingCanceling": - default: - if len(appStateMsg.EntranceStatuses) == 0 && appState.Status.Progress == appStateMsg.Progress { - glog.V(2).Infof("App state message is the same as the cached app state message for app %s, user %s, source %s, appState: %s, msgState: %s", - appStateMsg.Name, appStateMsg.User, appStateMsg.OpID, appState.Status.State, appStateMsg.State) - return - } - } + shouldUpdate := true + checker := func(appState *AppStateLatestData) { + switch { + // NATS State APP State + case appStateMsg.State == "running" && appState.Status.State == "installing": + case appStateMsg.State == "running" && appState.Status.State == "initializing": + case appStateMsg.State == "uninstalled" && appState.Status.State == "running": + case appStateMsg.State == "uninstalled" && appState.Status.State == "stopped": + case appStateMsg.State == "uninstalled" && appState.Status.State == "uninstalling": + case appStateMsg.State == "uninstalled" && appState.Status.State == "installingCanceling": + case appStateMsg.State == "uninstalled" && appState.Status.State == "installingCancelFailed": + case appStateMsg.State == "pendingCanceled" && appState.Status.State == "pending": + case appStateMsg.State == "downloadingCanceled" && appState.Status.State == "downloadingCanceling": + case appStateMsg.State == "downloadingCanceled" && appState.Status.State == "pending": + case appStateMsg.State == "installingCanceled" && appState.Status.State == "installing": + case appStateMsg.State == "installingCanceled" && appState.Status.State == "installingCanceling": + default: + // state = downloading + if len(appStateMsg.EntranceStatuses) == 0 && appState.Status.Progress == appStateMsg.Progress { + glog.V(2).Infof("App state message is the same as the cached app state message for app %s, user %s, source %s, appState: %s, msgState: %s", + appStateMsg.Name, appStateMsg.User, appStateMsg.OpID, appState.Status.State, appStateMsg.State) + shouldUpdate = false + return + } + } - // Compare timestamps properly by parsing them - if appState.Status.StatusTime != "" && appStateMsg.CreateTime != "" { - statusTime, err1 := time.Parse("2006-01-02T15:04:05.000000000Z", appState.Status.StatusTime) - createTime, err2 := time.Parse("2006-01-02T15:04:05.000000000Z", appStateMsg.CreateTime) + // Compare timestamps properly by parsing them + if appState.Status.StatusTime != "" && appStateMsg.CreateTime != "" { + statusTime, err1 := time.Parse(nanoTimeLayout, appState.Status.StatusTime) + createTime, err2 := time.Parse(nanoTimeLayout, appStateMsg.CreateTime) - if err1 == nil && err2 == nil { - if statusTime.After(createTime) { - glog.V(2).Infof("Cached app state is newer than incoming message for app %s, user %s, source %s, appTime: %s, msgTime: %s. Skipping update.", - appStateMsg.Name, appStateMsg.User, appStateMsg.OpID, statusTime.String(), createTime.String()) - return - } - } else { - glog.Errorf("Failed to parse timestamps for comparison: StatusTime=%s, CreateTime=%s, err1=%v, err2=%v", - appState.Status.StatusTime, appStateMsg.CreateTime, err1, err2) - } + if err1 == nil && err2 == nil { + if statusTime.After(createTime) { + glog.V(2).Infof("Cached app state is newer than incoming message for app %s, user %s, source %s, appTime: %s, msgTime: %s. Skipping update.", + appStateMsg.Name, appStateMsg.User, appStateMsg.OpID, statusTime.String(), createTime.String()) + shouldUpdate = false + return } + } else { + glog.Errorf("Failed to parse timestamps for comparison: StatusTime=%s, CreateTime=%s, err1=%v, err2=%v", + appState.Status.StatusTime, appStateMsg.CreateTime, err1, err2) } } } + dw.cacheManager.CompareAppStateMsg(appStateMsg.User, appStateMsg.MarketSource, appStateMsg.Name, checker) + + if !shouldUpdate { + return + } + + // userData := dw.cacheManager.getUserData(appStateMsg.User) + // if userData == nil { + // glog.V(2).Infof("User data not found for user %s", appStateMsg.User) + // return + // } + + // for sourceId, sourceData := range userData.Sources { + // if appStateMsg.MarketSource != "" && sourceId != appStateMsg.MarketSource { + // continue + // } + // for _, appState := range sourceData.AppStateLatest { + // if appState.Status.Name == appStateMsg.Name { // && appState.Status.State == appStateMsg.State + + // /** + // * [Mandatory Sync Whitelist] + // * The cases below define critical state transition scenarios that must be processed. + // * + // * Background: + // * When a user performs an action in the UI (e.g., canceling installation/download) or when an app lifecycle event completes (e.g., installation finished, uninstallation finished), + // * the final state pushed by NATS (appStateMsg.State) may differ from the cached state in memory (appState.Status.State). + // * + // * Purpose: + // * Even if the progress (Progress) has not changed and there is no entrance information (EntranceStatuses), + // * as long as the following conditions are met, we must bypass the "deduplication check" in the default branch. + // * This forces the local state to update and pushes the change to the frontend, ensuring the UI promptly reflects the final result. + // */ + // switch { + // // NATS State APP State + // case appStateMsg.State == "running" && appState.Status.State == "installing": + // case appStateMsg.State == "running" && appState.Status.State == "initializing": + // case appStateMsg.State == "uninstalled" && appState.Status.State == "running": + // case appStateMsg.State == "uninstalled" && appState.Status.State == "stopped": + // case appStateMsg.State == "uninstalled" && appState.Status.State == "uninstalling": + // case appStateMsg.State == "uninstalled" && appState.Status.State == "installingCanceling": + // case appStateMsg.State == "uninstalled" && appState.Status.State == "installingCancelFailed": + // case appStateMsg.State == "pendingCanceled" && appState.Status.State == "pending": + // case appStateMsg.State == "downloadingCanceled" && appState.Status.State == "downloadingCanceling": + // case appStateMsg.State == "downloadingCanceled" && appState.Status.State == "pending": + // case appStateMsg.State == "installingCanceled" && appState.Status.State == "installing": + // case appStateMsg.State == "installingCanceled" && appState.Status.State == "installingCanceling": + // default: + // if len(appStateMsg.EntranceStatuses) == 0 && appState.Status.Progress == appStateMsg.Progress { + // glog.V(2).Infof("App state message is the same as the cached app state message for app %s, user %s, source %s, appState: %s, msgState: %s", + // appStateMsg.Name, appStateMsg.User, appStateMsg.OpID, appState.Status.State, appStateMsg.State) + // return + // } + // } + + // // Compare timestamps properly by parsing them + // if appState.Status.StatusTime != "" && appStateMsg.CreateTime != "" { + // statusTime, err1 := time.Parse("2006-01-02T15:04:05.000000000Z", appState.Status.StatusTime) + // createTime, err2 := time.Parse("2006-01-02T15:04:05.000000000Z", appStateMsg.CreateTime) + + // if err1 == nil && err2 == nil { + // if statusTime.After(createTime) { + // glog.V(2).Infof("Cached app state is newer than incoming message for app %s, user %s, source %s, appTime: %s, msgTime: %s. Skipping update.", + // appStateMsg.Name, appStateMsg.User, appStateMsg.OpID, statusTime.String(), createTime.String()) + // return + // } + // } else { + // glog.Errorf("Failed to parse timestamps for comparison: StatusTime=%s, CreateTime=%s, err1=%v, err2=%v", + // appState.Status.StatusTime, appStateMsg.CreateTime, err1, err2) + // } + // } + // } + // } + // } + // Process the message - glog.V(2).Infof("State - Processs message from NATS subject %s, for internal for opID: %s, app: %s, user: %s, msgState: %s", + glog.V(2).Infof("State - Processs update message from NATS subject %s, for internal for opID: %s, app: %s, user: %s, msgState: %s", msg.Subject, appStateMsg.OpID, appStateMsg.Name, appStateMsg.User, appStateMsg.State) dw.processMessageInternal(msg, appStateMsg) } @@ -837,7 +894,8 @@ func (dw *DataWatcherState) storeStateToCache(msg AppStateMessage) { // Parse statusTime for sorting var statusTime time.Time if appState.Status.StatusTime != "" { - if parsedTime, err := time.Parse("2006-01-02T15:04:05.000000000Z", appState.Status.StatusTime); err == nil { + + if parsedTime, err := time.Parse(nanoTimeLayout, appState.Status.StatusTime); err == nil { statusTime = parsedTime } else { // If parsing fails, use zero time (will be sorted to the end) @@ -983,7 +1041,7 @@ func (dw *DataWatcherState) storeStateToCache(msg AppStateMessage) { return } - if err := dw.cacheManager.SetAppData(userID, sourceID, AppStateLatest, stateData); err != nil { // + App - Sending + if err := dw.cacheManager.SetAppData(userID, sourceID, AppStateLatest, stateData, "DataWatcherState"); err != nil { // + App - Sending glog.Errorf("Failed to store app state to cache: %v", err) } else { glog.V(2).Infof("Successfully stored app state to cache for user=%s, source=%s, app=%s, state=%s", From 39fb00ea53beb2c0237c377c954bee55a4aefd18 Mon Sep 17 00:00:00 2001 From: aby913 Date: Wed, 11 Mar 2026 19:36:33 +0800 Subject: [PATCH 33/45] refactor: update user cache on add/delete operations --- internal/v2/appinfo/cache.go | 41 +++++++++++++++++-- internal/v2/appinfo/datasender_app.go | 6 +-- internal/v2/appinfo/db.go | 6 ++- .../v2/appinfo/hydrationfn/task_for_api.go | 4 +- internal/v2/appinfo/pipeline.go | 3 ++ internal/v2/appinfo/syncer.go | 9 ++++ internal/v2/utils/state_monitor.go | 4 +- 7 files changed, 61 insertions(+), 12 deletions(-) diff --git a/internal/v2/appinfo/cache.go b/internal/v2/appinfo/cache.go index 786ee19..5d7eed2 100644 --- a/internal/v2/appinfo/cache.go +++ b/internal/v2/appinfo/cache.go @@ -982,7 +982,7 @@ func (cm *CacheManager) updateAppStateLatest(userID, sourceID string, sourceData Source: sourceID, } - if err := cm.dataSender.SendAppInfoUpdate(update); err != nil { + if err := cm.dataSender.SendAppInfoUpdate(update, "cache"); err != nil { glog.Errorf("Force push state update for app %s failed: %v", newAppState.Status.Name, err) } else { glog.V(3).Infof("Force pushed state update for app %s due to EntranceStatuses fallback (only metadata changed)", newAppState.Status.Name) @@ -2561,16 +2561,16 @@ func (cm *CacheManager) HandlerEvent() cache.ResourceEventHandler { }, Handler: cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { - cm.ListUsers() + cm.ListUsers("Add") }, DeleteFunc: func(obj interface{}) { - cm.ListUsers() + cm.ListUsers("Delete") }, }, } } -func (cm *CacheManager) ListUsers() { +func (cm *CacheManager) ListUsers(opType string) { dynamicClient := client.Factory.Client() unstructuredUsers, err := dynamicClient.Resource(client.UserGVR).List(context.Background(), v1.ListOptions{}) if err != nil { @@ -2578,6 +2578,7 @@ func (cm *CacheManager) ListUsers() { return } + glog.Infof("[Cache] User watch handler, type: %s", opType) var userList = make([]*client.User, 0) for _, unstructuredUser := range unstructuredUsers.Items { @@ -2628,6 +2629,30 @@ func (cm *CacheManager) ListUsers() { } } +func (cm *CacheManager) RemoveDeletedUser() { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + var users []string + for _, user := range cm.cache.Users { + if user.UserInfo == nil { + continue + } + if !user.UserInfo.Exists { + users = append(users, user.UserInfo.Name) + } + } + + if len(users) == 0 { + return + } + + glog.Infof("[Cache] Remove deleted users: %v", users) + for _, u := range users { + delete(cm.cache.Users, u) + } +} + func (cm *CacheManager) GetCachedData() string { cm.mutex.RLock() defer cm.mutex.RUnlock() @@ -2641,6 +2666,14 @@ func (cm *CacheManager) GetCachedData() string { var apps = make(map[string]interface{}) apps["latest"] = len(sv.AppInfoLatest) apps["pending"] = len(sv.AppInfoLatestPending) + var pendings []string + if len(sv.AppInfoLatestPending) < 10 { + for _, pending := range sv.AppInfoLatestPending { + pendings = append(pendings, fmt.Sprintf("%s_%s_%s", sn, pending.AppInfo.AppEntry.Name, pending.AppInfo.AppEntry.Version)) + } + } + apps["pending_apps"] = pendings + apps["failed"] = len(sv.AppRenderFailed) apps["history"] = len(sv.AppInfoHistory) apps["state"] = len(sv.AppStateLatest) diff --git a/internal/v2/appinfo/datasender_app.go b/internal/v2/appinfo/datasender_app.go index b17761e..5c8a648 100644 --- a/internal/v2/appinfo/datasender_app.go +++ b/internal/v2/appinfo/datasender_app.go @@ -96,7 +96,7 @@ func loadConfig() Config { } // SendAppInfoUpdate sends app info update to NATS -func (ds *DataSender) SendAppInfoUpdate(update types.AppInfoUpdate) error { +func (ds *DataSender) SendAppInfoUpdate(update types.AppInfoUpdate, trace string) error { if !ds.enabled { glog.V(3).Info("NATS data sender is disabled, skipping message send") return nil @@ -116,9 +116,9 @@ func (ds *DataSender) SendAppInfoUpdate(update types.AppInfoUpdate) error { // Log before sending if len(string(data)) > 800 { - glog.V(2).Infof("App - Sending app info update to NATS subject '%s': %s", subject, string(data)[:800]) + glog.V(2).Infof("App - Sending app info update to NATS subject '%s'(trace: %s): %s", subject, trace, string(data)[:800]) } else { - glog.V(2).Infof("App - Sending app info update to NATS subject '%s': %s", subject, string(data)) + glog.V(2).Infof("App - Sending app info update to NATS subject '%s'(trace: %s): %s", subject, trace, string(data)) } // Send message to NATS diff --git a/internal/v2/appinfo/db.go b/internal/v2/appinfo/db.go index 63a0107..453e4e9 100644 --- a/internal/v2/appinfo/db.go +++ b/internal/v2/appinfo/db.go @@ -159,6 +159,10 @@ func (r *RedisClient) LoadCacheFromRedis() (*CacheData, error) { func (r *RedisClient) loadUserData(userID string) (*UserData, error) { userData := NewUserDataEx(userID) // NewUserData() + if userData.UserInfo == nil { + return nil, fmt.Errorf("User %s not exists in cluster", userID) + } + // Load user hash from Redis userHashKey := fmt.Sprintf("appinfo:user:%s:hash", userID) hashValue, err := r.client.Get(r.ctx, userHashKey).Result() @@ -166,7 +170,7 @@ func (r *RedisClient) loadUserData(userID string) (*UserData, error) { userData.Hash = hashValue glog.Infof("Loaded user hash from Redis: user=%s, hash=%s", userID, hashValue) } else if err != redis.Nil { - glog.Warningf("Failed to load user hash from Redis: user=%s, error=%v", userID, err) + glog.Errorf("Failed to load user hash from Redis: user=%s, error=%v", userID, err) } // Get all source keys for this user diff --git a/internal/v2/appinfo/hydrationfn/task_for_api.go b/internal/v2/appinfo/hydrationfn/task_for_api.go index 965363d..72a6cd0 100644 --- a/internal/v2/appinfo/hydrationfn/task_for_api.go +++ b/internal/v2/appinfo/hydrationfn/task_for_api.go @@ -108,14 +108,14 @@ func (s *TaskForApiStep) Execute(ctx context.Context, task *HydrationTask) error if err := s.writeAppDataToCache(task, appData); err != nil { glog.Errorf("Warning: failed to write app_data to cache: %v", err) } else { - glog.V(3).Infof("Successfully wrote app_data to cache for user=%s, source=%s, app=%s, appName=%s", + glog.V(2).Infof("Successfully wrote app_data to cache for user=%s, source=%s, app=%s, appName=%s", task.UserID, task.SourceID, task.AppID, task.AppName) } } } } - glog.V(3).Info("SyncApp to chart repo completed successfully") + glog.V(2).Infof("[TaskForApi] SyncApp %s(%s %s) to chart repo completed successfully", task.AppID, task.AppName, task.AppVersion) return nil } diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go index 3b8e50a..30836d8 100644 --- a/internal/v2/appinfo/pipeline.go +++ b/internal/v2/appinfo/pipeline.go @@ -117,6 +117,9 @@ func (p *Pipeline) run(ctx context.Context) { startTime := time.Now() + // add check current all users in cluster + p.cacheManager.RemoveDeletedUser() + // Phase 1-4: only modify data, no hash calculation or ForceSync p.phaseSyncer(ctx) hydrateUsers := p.phaseHydrateApps(ctx) diff --git a/internal/v2/appinfo/syncer.go b/internal/v2/appinfo/syncer.go index a0e0686..8a9719f 100644 --- a/internal/v2/appinfo/syncer.go +++ b/internal/v2/appinfo/syncer.go @@ -51,6 +51,7 @@ type Syncer struct { lastSyncedAppCount atomic.Int64 lastSyncDetails atomic.Value // *SyncDetails statusMutex sync.RWMutex // Mutex for complex status updates + tryOnce atomic.Bool } // NewSyncer creates a new syncer with the given steps @@ -63,6 +64,7 @@ func NewSyncer(cache *CacheData, syncInterval time.Duration, settingsManager *se stopChan: make(chan struct{}), isRunning: atomic.Bool{}, // Initialize with false settingsManager: settingsManager, + tryOnce: atomic.Bool{}, } // Initialize atomic values s.lastSyncTime.Store(time.Time{}) @@ -140,6 +142,11 @@ func (s *Syncer) SyncOnce(ctx context.Context) { return } + flag := s.tryOnce.Load() + if flag { + // return + } + configChanged, reason := s.hasSyncRelevantConfigChanged() throttled := !s.lastSyncExecuted.IsZero() && time.Since(s.lastSyncExecuted) < s.syncInterval @@ -159,6 +166,8 @@ func (s *Syncer) SyncOnce(ctx context.Context) { if err := s.executeSyncCycle(ctx); err != nil { glog.Errorf("SyncOnce: sync cycle failed: %v", err) } + + s.tryOnce.Store(true) } // hasAnyRemoteHashChanged does a lightweight HTTP probe to each remote source's diff --git a/internal/v2/utils/state_monitor.go b/internal/v2/utils/state_monitor.go index 72016cc..2bd2c82 100644 --- a/internal/v2/utils/state_monitor.go +++ b/internal/v2/utils/state_monitor.go @@ -10,7 +10,7 @@ import ( // DataSenderInterface defines the interface for sending app info updates type DataSenderInterface interface { - SendAppInfoUpdate(update types.AppInfoUpdate) error + SendAppInfoUpdate(update types.AppInfoUpdate, trace string) error IsConnected() bool Close() } @@ -66,7 +66,7 @@ func (sm *StateMonitor) NotifyStateChange( Source: sourceID, } - return sm.dataSender.SendAppInfoUpdate(update) + return sm.dataSender.SendAppInfoUpdate(update, "state_monitor") } // HasStateChanged checks if the app state has changed compared to existing state From fbde3780e240bfebda0639eeaf0485028e31c370 Mon Sep 17 00:00:00 2001 From: aby913 Date: Wed, 11 Mar 2026 19:37:19 +0800 Subject: [PATCH 34/45] fix: add parsing for rawAppName field --- internal/v2/appinfo/status_correction_check.go | 15 ++++++++------- internal/v2/utils/setup.go | 16 +++++++++------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/internal/v2/appinfo/status_correction_check.go b/internal/v2/appinfo/status_correction_check.go index 077213a..315a7ca 100644 --- a/internal/v2/appinfo/status_correction_check.go +++ b/internal/v2/appinfo/status_correction_check.go @@ -406,13 +406,14 @@ func (scc *StatusCorrectionChecker) fetchLatestMiddlewaresStatus() ([]utils.AppS Namespace: middleware.Namespace, }, Spec: struct { - Name string `json:"name"` - AppID string `json:"appid"` - Owner string `json:"owner"` - Icon string `json:"icon"` - Title string `json:"title"` - Source string `json:"source"` - Entrances []struct { + Name string `json:"name"` + RawAppName string `json:"rawAppName"` + AppID string `json:"appid"` + Owner string `json:"owner"` + Icon string `json:"icon"` + Title string `json:"title"` + Source string `json:"source"` + Entrances []struct { Name string `json:"name"` Url string `json:"url"` Invisible bool `json:"invisible"` diff --git a/internal/v2/utils/setup.go b/internal/v2/utils/setup.go index 99653eb..cf2e43e 100644 --- a/internal/v2/utils/setup.go +++ b/internal/v2/utils/setup.go @@ -23,13 +23,14 @@ type AppServiceResponse struct { Namespace string `json:"namespace"` } `json:"metadata"` Spec struct { - Name string `json:"name"` - AppID string `json:"appid"` - Owner string `json:"owner"` - Icon string `json:"icon"` - Title string `json:"title"` - Source string `json:"source"` - Entrances []struct { + Name string `json:"name"` + RawAppName string `json:"rawAppName"` + AppID string `json:"appid"` + Owner string `json:"owner"` + Icon string `json:"icon"` + Title string `json:"title"` + Source string `json:"source"` + Entrances []struct { Name string `json:"name"` Url string `json:"url"` Invisible bool `json:"invisible"` @@ -595,6 +596,7 @@ func FetchAppEntranceUrls(appName string, user string) (map[string]string, error func createAppStateLatestData(app AppServiceResponse, isStartupProcess bool) (*types.AppStateLatestData, string) { data := map[string]interface{}{ "name": app.Spec.Name, + "rawAppName": app.Spec.RawAppName, "title": app.Spec.Title, "state": app.Status.State, "updateTime": app.Status.UpdateTime, From b7ee8e2214b5a001eaaa9b71b1faa63cdb9527bc Mon Sep 17 00:00:00 2001 From: aby913 Date: Wed, 11 Mar 2026 20:35:43 +0800 Subject: [PATCH 35/45] refactor: add state api for settings --- pkg/v2/api/app.go | 126 +++++++++++++++++++++++++++++++++++++++++-- pkg/v2/api/server.go | 3 ++ 2 files changed, 125 insertions(+), 4 deletions(-) diff --git a/pkg/v2/api/app.go b/pkg/v2/api/app.go index c8d8447..6187c4b 100644 --- a/pkg/v2/api/app.go +++ b/pkg/v2/api/app.go @@ -97,6 +97,7 @@ type FilteredSourceData struct { // FilteredSourceDataForState represents filtered source data for state endpoint (only AppStateLatest) type FilteredSourceDataForState struct { Type types.SourceDataType `json:"type"` + AppInfoLatest []*types.AppInfoLatestData `json:"app_info_latest,omitempty"` AppStateLatest []*types.AppStateLatestData `json:"app_state_latest"` } @@ -754,7 +755,113 @@ func (s *Server) getMarketState(w http.ResponseWriter, r *http.Request) { // Filter the user data to include only AppStateLatest fields with timeout filterStart := time.Now() - filteredUserData := s.filterUserDataForStateWithTimeout(ctx, userData) + filteredUserData := s.filterUserDataForStateWithTimeout(ctx, userData, false) + if filteredUserData == nil { + glog.V(3).Infof("Data filtering timed out or failed for user: %s", userID) + resultChan <- result{err: fmt.Errorf("data filtering timeout")} + return + } + glog.V(3).Infof("Data filtering took %v for user: %s", time.Since(filterStart), userID) + + // Prepare response data + responseData := MarketStateResponse{ + UserData: filteredUserData, + UserID: userID, + Timestamp: time.Now().Unix(), + } + + resultChan <- result{data: responseData} + }() + + // Wait for result or timeout + select { + case <-ctx.Done(): + glog.V(3).Infof("Request timeout or cancelled for /api/v2/market/state") + s.sendResponse(w, http.StatusRequestTimeout, false, "Request timeout - data retrieval took too long", nil) + return + case res := <-resultChan: + if res.err != nil { + glog.Errorf("Error retrieving market state: %v", res.err) + if res.err.Error() == "user data not found" { + s.sendResponse(w, http.StatusNotFound, false, "User data not found", nil) + } else { + s.sendResponse(w, http.StatusInternalServerError, false, "Failed to retrieve market state", nil) + } + return + } + + glog.V(2).Infof("Market state retrieved successfully for user: %s", userID) + s.sendResponse(w, http.StatusOK, true, "Market state retrieved successfully", res.data) + } +} + +// Get market state information (only AppStateLatest data) +func (s *Server) getMarketStateSimple(w http.ResponseWriter, r *http.Request) { + requestStart := time.Now() + glog.V(2).Infof("GET /api/v2/market/statesimple - Getting market state simple, request start: %v", requestStart) + + // Add timeout context + ctx, cancel := context.WithTimeout(r.Context(), 30*time.Second) + defer cancel() + + // Check if cache manager is available + if s.cacheManager == nil { + glog.V(3).Info("Cache manager is not initialized") + s.sendResponse(w, http.StatusInternalServerError, false, "Cache manager not available", nil) + return + } + + // Convert http.Request to restful.Request to reuse utils functions + restfulReq := s.httpToRestfulRequest(r) + + // Get user information from request using utils module + authStart := time.Now() + userID, err := utils.GetUserInfoFromRequest(restfulReq) + if err != nil { + glog.Errorf("Failed to get user from request: %v", err) + s.sendResponse(w, http.StatusUnauthorized, false, "Failed to get user information", nil) + return + } + glog.V(3).Infof("User authentication took %v, retrieved user ID: %s", time.Since(authStart), userID) + + // Create a channel to receive the result + type result struct { + data MarketStateResponse + err error + } + resultChan := make(chan result, 1) + + // Run the data retrieval in a goroutine + go func() { + defer func() { + if r := recover(); r != nil { + glog.Errorf("Panic in getMarketState: %v", r) + resultChan <- result{err: fmt.Errorf("internal error occurred")} + } + }() + + // Get user data from cache with timeout check + start := time.Now() + userData := s.cacheManager.GetUserData(userID) + if userData == nil { + glog.V(3).Infof("User data not found for user: %s", userID) + resultChan <- result{err: fmt.Errorf("user data not found")} + return + } + glog.V(3).Infof("GetUserData took %v for user: %s", time.Since(start), userID) + + // Check if we're still within timeout before filtering + select { + case <-ctx.Done(): + glog.V(3).Infof("Context cancelled during user data retrieval for user: %s", userID) + resultChan <- result{err: fmt.Errorf("request cancelled")} + return + default: + } + + // Filter the user data to include only AppStateLatest fields with timeout + filterStart := time.Now() + filteredUserData := s.filterUserDataForStateWithTimeout(ctx, userData, true) if filteredUserData == nil { glog.V(3).Infof("Data filtering timed out or failed for user: %s", userID) resultChan <- result{err: fmt.Errorf("data filtering timeout")} @@ -1041,7 +1148,7 @@ func (s *Server) convertSourceDataToFiltered(sourceData *types.SourceData) *Filt } // filterUserDataForStateWithTimeout filters user data to include only AppStateLatest fields with timeout -func (s *Server) filterUserDataForStateWithTimeout(ctx context.Context, userData *types.UserData) *FilteredUserDataForState { +func (s *Server) filterUserDataForStateWithTimeout(ctx context.Context, userData *types.UserData, withAppInfoLatest bool) *FilteredUserDataForState { if userData == nil { return nil } @@ -1073,7 +1180,7 @@ func (s *Server) filterUserDataForStateWithTimeout(ctx context.Context, userData } // Convert data directly without additional locks - filteredSourceData := s.convertSourceDataToFilteredForState(sourceData) + filteredSourceData := s.convertSourceDataToFilteredForState(sourceData, withAppInfoLatest) if filteredSourceData != nil { filteredUserData.Sources[sourceID] = filteredSourceData } @@ -1084,7 +1191,7 @@ func (s *Server) filterUserDataForStateWithTimeout(ctx context.Context, userData } // convertSourceDataToFilteredForState converts source data to filtered format for state endpoint (only AppStateLatest) -func (s *Server) convertSourceDataToFilteredForState(sourceData *types.SourceData) *FilteredSourceDataForState { +func (s *Server) convertSourceDataToFilteredForState(sourceData *types.SourceData, withAppInfoLatest bool) *FilteredSourceDataForState { if sourceData == nil { return nil } @@ -1094,6 +1201,17 @@ func (s *Server) convertSourceDataToFilteredForState(sourceData *types.SourceDat AppStateLatest: sourceData.AppStateLatest, } + if withAppInfoLatest { + var appInfoLatest []*types.AppInfoLatestData + for _, app := range sourceData.AppInfoLatest { + var info = &types.AppInfoLatestData{ + AppSimpleInfo: app.AppSimpleInfo, + } + appInfoLatest = append(appInfoLatest, info) + } + filteredSourceData.AppInfoLatest = appInfoLatest + } + return filteredSourceData } diff --git a/pkg/v2/api/server.go b/pkg/v2/api/server.go index 6a60b2e..f00f4ff 100644 --- a/pkg/v2/api/server.go +++ b/pkg/v2/api/server.go @@ -86,6 +86,9 @@ func (s *Server) setupRoutes() { api.HandleFunc("/market/state", s.getMarketState).Methods("GET") glog.V(3).Info("Route configured: GET /app-store/api/v2/market/state") + api.HandleFunc("/market/statesimple", s.getMarketStateSimple).Methods("GET") + glog.V(3).Info("Route configured: GET /app-store/api/v2/market/statesimple") + // 2. Get specific application information (supports multiple queries) api.HandleFunc("/apps", s.getAppsInfo).Methods("POST") glog.V(3).Info("Route configured: POST /app-store/api/v2/apps") From 449e1f6083d041021cd2740ca3c110ede8776ec0 Mon Sep 17 00:00:00 2001 From: aby913 Date: Wed, 11 Mar 2026 20:36:52 +0800 Subject: [PATCH 36/45] fix: add parsing for Title field --- internal/v2/appinfo/status_correction_check.go | 10 ++++++++++ internal/v2/utils/setup.go | 12 +++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/internal/v2/appinfo/status_correction_check.go b/internal/v2/appinfo/status_correction_check.go index 315a7ca..ce8f3c1 100644 --- a/internal/v2/appinfo/status_correction_check.go +++ b/internal/v2/appinfo/status_correction_check.go @@ -418,6 +418,16 @@ func (scc *StatusCorrectionChecker) fetchLatestMiddlewaresStatus() ([]utils.AppS Url string `json:"url"` Invisible bool `json:"invisible"` } `json:"entrances"` + Settings struct { + ClusterScoped string `json:"clusterScoped"` + MobileSupported string `json:"mobileSupported"` + Policy string `json:"policy"` + RequiredGPU string `json:"requiredGPU"` + Source string `json:"source"` + Target string `json:"target"` + Title string `json:"title"` + Version string `json:"version"` + } `json:"settings"` }{ Name: middleware.Metadata.Name, AppID: middleware.Metadata.Name, diff --git a/internal/v2/utils/setup.go b/internal/v2/utils/setup.go index cf2e43e..c68a734 100644 --- a/internal/v2/utils/setup.go +++ b/internal/v2/utils/setup.go @@ -35,6 +35,16 @@ type AppServiceResponse struct { Url string `json:"url"` Invisible bool `json:"invisible"` } `json:"entrances"` + Settings struct { + ClusterScoped string `json:"clusterScoped"` + MobileSupported string `json:"mobileSupported"` + Policy string `json:"policy"` + RequiredGPU string `json:"requiredGPU"` + Source string `json:"source"` + Target string `json:"target"` + Title string `json:"title"` + Version string `json:"version"` + } `json:"settings"` } `json:"spec"` Status struct { State string `json:"state"` @@ -597,7 +607,7 @@ func createAppStateLatestData(app AppServiceResponse, isStartupProcess bool) (*t data := map[string]interface{}{ "name": app.Spec.Name, "rawAppName": app.Spec.RawAppName, - "title": app.Spec.Title, + "title": app.Spec.Settings.Title, "state": app.Status.State, "updateTime": app.Status.UpdateTime, "statusTime": app.Status.StatusTime, From 3060c25fe208d5df9523b95f92b02c6d70a30a76 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Mar 2026 09:37:50 +0000 Subject: [PATCH 37/45] Fix pending/failed overlap and deduplicate failed entries Co-authored-by: aby913 --- internal/v2/appinfo/cache.go | 105 +++++++++++++++++++++++++++++++---- 1 file changed, 94 insertions(+), 11 deletions(-) diff --git a/internal/v2/appinfo/cache.go b/internal/v2/appinfo/cache.go index 5d7eed2..0c7ca3b 100644 --- a/internal/v2/appinfo/cache.go +++ b/internal/v2/appinfo/cache.go @@ -233,6 +233,23 @@ func (cm *CacheManager) UpsertLatestAndRemovePending( } sourceData.AppInfoLatestPending = newPending + // Remove the same app from render-failed list after successful move to latest. + // Keep Pending/Failed disjoint and avoid stale failed entries. + newFailed := make([]*types.AppRenderFailedData, 0, len(sourceData.AppRenderFailed)) + for _, f := range sourceData.AppRenderFailed { + if f == nil || f.RawData == nil { + newFailed = append(newFailed, f) + continue + } + matchedByID := appID != "" && (f.RawData.ID == appID || f.RawData.AppID == appID) + matchedByName := appName != "" && f.RawData.Name == appName + if matchedByID || matchedByName { + continue + } + newFailed = append(newFailed, f) + } + sourceData.AppRenderFailed = newFailed + return oldVersion, replaced, true } @@ -1369,6 +1386,27 @@ func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType App latestVersionMap[latestApp.RawData.ID] = v } } + // Build version map from AppRenderFailed to avoid re-adding the same failed app + // into Pending on every sync cycle. + failedVersionMap := make(map[string]string) + for _, failedApp := range sourceData.AppRenderFailed { + if failedApp == nil || failedApp.RawData == nil { + continue + } + v := failedApp.Version + if v == "" { + v = failedApp.RawData.Version + } + if failedApp.RawData.Name != "" { + failedVersionMap[failedApp.RawData.Name] = v + } + if failedApp.RawData.AppID != "" { + failedVersionMap[failedApp.RawData.AppID] = v + } + if failedApp.RawData.ID != "" { + failedVersionMap[failedApp.RawData.ID] = v + } + } originalCount := len(sourceData.AppInfoLatestPending) sourceData.AppInfoLatestPending = sourceData.AppInfoLatestPending[:0] @@ -1379,18 +1417,41 @@ func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType App return false } incomingVersion := appData.RawData.Version - if incomingVersion == "" { - return false - } - if name := appData.RawData.Name; name != "" { - if existing, ok := latestVersionMap[name]; ok && existing == incomingVersion { - return true + if incomingVersion != "" { + if name := appData.RawData.Name; name != "" { + if existing, ok := latestVersionMap[name]; ok && existing == incomingVersion { + return true + } + } + if id := appData.RawData.AppID; id != "" { + if existing, ok := latestVersionMap[id]; ok && existing == incomingVersion { + return true + } + } + if id := appData.RawData.ID; id != "" { + if existing, ok := latestVersionMap[id]; ok && existing == incomingVersion { + return true + } } } - if id := appData.RawData.AppID; id != "" { - if existing, ok := latestVersionMap[id]; ok && existing == incomingVersion { + + // Skip app when the same app-version is already in render-failed. + // If either side has empty version, still skip to prevent Pending/Failed overlap. + matchFailed := func(key string) bool { + if key == "" { + return false + } + failedVersion, ok := failedVersionMap[key] + if !ok { + return false + } + if failedVersion == "" || incomingVersion == "" { return true } + return failedVersion == incomingVersion + } + if matchFailed(appData.RawData.Name) || matchFailed(appData.RawData.AppID) || matchFailed(appData.RawData.ID) { + return true } return false } @@ -1483,14 +1544,36 @@ func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType App } } - glog.V(2).Infof("Updated AppInfoLatestPending: %d new, %d skipped (unchanged version) for user=%s, source=%s", + glog.V(2).Infof("Updated AppInfoLatestPending: %d new, %d skipped (unchanged version or in render-failed) for user=%s, source=%s", len(sourceData.AppInfoLatestPending), skippedCount, userID, sourceID) case types.AppRenderFailed: // Handle render failed data - this is typically set by the hydrator when tasks fail if failedAppData, hasFailedApp := data["failed_app"].(*types.AppRenderFailedData); hasFailedApp { - sourceData.AppRenderFailed = append(sourceData.AppRenderFailed, failedAppData) - glog.V(3).Infof("Added render failed app for user=%s, source=%s, app=%s, reason=%s", + if failedAppData == nil || failedAppData.RawData == nil { + glog.Errorf("Invalid render failed data: nil failed app or raw data for user=%s, source=%s", userID, sourceID) + return fmt.Errorf("invalid render failed data: nil failed app or raw data") + } + + replaced := false + for i, existing := range sourceData.AppRenderFailed { + if existing == nil || existing.RawData == nil { + continue + } + matchedByID := (failedAppData.RawData.ID != "" && existing.RawData.ID == failedAppData.RawData.ID) || + (failedAppData.RawData.AppID != "" && existing.RawData.AppID == failedAppData.RawData.AppID) + matchedByName := failedAppData.RawData.Name != "" && existing.RawData.Name == failedAppData.RawData.Name + if matchedByID || matchedByName { + sourceData.AppRenderFailed[i] = failedAppData + replaced = true + break + } + } + + if !replaced { + sourceData.AppRenderFailed = append(sourceData.AppRenderFailed, failedAppData) + } + glog.V(3).Infof("Upserted render failed app for user=%s, source=%s, app=%s, reason=%s", userID, sourceID, failedAppData.RawData.AppID, failedAppData.FailureReason) } else { glog.Errorf("Invalid render failed data format for user=%s, source=%s", userID, sourceID) From 9a8b3340a6c68f2c9eb93ccf2ef39777842f0f13 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Mar 2026 11:25:59 +0000 Subject: [PATCH 38/45] Allow version upgrades past failed-list gating Co-authored-by: aby913 --- internal/v2/appinfo/cache.go | 37 ++++++++++++++++++++++---------- internal/v2/appinfo/hydration.go | 7 +++--- internal/v2/appinfo/pipeline.go | 11 +++++----- 3 files changed, 36 insertions(+), 19 deletions(-) diff --git a/internal/v2/appinfo/cache.go b/internal/v2/appinfo/cache.go index 0c7ca3b..0a2019e 100644 --- a/internal/v2/appinfo/cache.go +++ b/internal/v2/appinfo/cache.go @@ -475,7 +475,8 @@ func (cm *CacheManager) IsAppInLatestQueue(userID, sourceID, appID, version stri } // IsAppInRenderFailedList checks if an app exists in the render failed list. -func (cm *CacheManager) IsAppInRenderFailedList(userID, sourceID, appID string) bool { +// When version is provided, only same-version failures will be treated as a match. +func (cm *CacheManager) IsAppInRenderFailedList(userID, sourceID, appID, appName, version string) bool { cm.mutex.RLock() defer cm.mutex.RUnlock() @@ -488,10 +489,27 @@ func (cm *CacheManager) IsAppInRenderFailedList(userID, sourceID, appID string) return false } for _, fd := range sourceData.AppRenderFailed { - if fd.RawData != nil && - (fd.RawData.ID == appID || fd.RawData.AppID == appID || fd.RawData.Name == appID) { - return true + if fd == nil || fd.RawData == nil { + continue + } + + matchedByID := appID != "" && (fd.RawData.ID == appID || fd.RawData.AppID == appID || fd.RawData.Name == appID) + matchedByName := appName != "" && fd.RawData.Name == appName + if !matchedByID && !matchedByName { + continue + } + + // If incoming version is known, only block when failed record has the same known version. + if version != "" { + failedVersion := fd.Version + if failedVersion == "" { + failedVersion = fd.RawData.Version + } + if failedVersion == "" || failedVersion != version { + continue + } } + return true } return false } @@ -1435,19 +1453,16 @@ func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType App } } - // Skip app when the same app-version is already in render-failed. - // If either side has empty version, still skip to prevent Pending/Failed overlap. + // Skip app only when the same app-version is already in render-failed. + // Unknown versions should not block upgrades/new retries. matchFailed := func(key string) bool { - if key == "" { + if key == "" || incomingVersion == "" { return false } failedVersion, ok := failedVersionMap[key] - if !ok { + if !ok || failedVersion == "" { return false } - if failedVersion == "" || incomingVersion == "" { - return true - } return failedVersion == incomingVersion } if matchFailed(appData.RawData.Name) || matchFailed(appData.RawData.AppID) || matchFailed(appData.RawData.ID) { diff --git a/internal/v2/appinfo/hydration.go b/internal/v2/appinfo/hydration.go index 9be6108..93c5a3d 100644 --- a/internal/v2/appinfo/hydration.go +++ b/internal/v2/appinfo/hydration.go @@ -1003,11 +1003,12 @@ func (h *Hydrator) isAppInLatestQueue(userID, sourceID, appID, appName, version return result } -// isAppInRenderFailedList checks if an app already exists in the render failed list -func (h *Hydrator) isAppInRenderFailedList(userID, sourceID, appID, appName string) bool { +// isAppInRenderFailedList checks if an app already exists in the render failed list. +// When version is provided, only same-version failure will block hydration. +func (h *Hydrator) isAppInRenderFailedList(userID, sourceID, appID, appName, version string) bool { if h.cacheManager == nil { glog.V(2).Infof("Warning: CacheManager not available for isAppInRenderFailedList") return false } - return h.cacheManager.IsAppInRenderFailedList(userID, sourceID, appID) + return h.cacheManager.IsAppInRenderFailedList(userID, sourceID, appID, appName, version) } diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go index 30836d8..20b36e4 100644 --- a/internal/v2/appinfo/pipeline.go +++ b/internal/v2/appinfo/pipeline.go @@ -350,7 +350,12 @@ func (h *Hydrator) HydrateSingleApp(ctx context.Context, userID, sourceID string return false } - if h.isAppInRenderFailedList(userID, sourceID, appID, appName) { + version := "" + if pendingData.RawData != nil { + version = pendingData.RawData.Version + } + + if h.isAppInRenderFailedList(userID, sourceID, appID, appName, version) { glog.V(2).Infof("HydrateSingleApp: skipping %s(%s) (user=%s, source=%s) - in render failed list, will retry after cleanup", appID, appName, userID, sourceID) return false @@ -360,10 +365,6 @@ func (h *Hydrator) HydrateSingleApp(ctx context.Context, userID, sourceID string return true } - version := "" - if pendingData.RawData != nil { - version = pendingData.RawData.Version - } if h.isAppInLatestQueue(userID, sourceID, appID, appName, version) { glog.V(2).Infof("HydrateSingleApp: skipping %s(%s) (user=%s, source=%s) - already in latest queue with version %s", appID, appName, userID, sourceID, version) From 5b8333a4f048b67e2087f4565884227f65c68381 Mon Sep 17 00:00:00 2001 From: aby913 Date: Fri, 13 Mar 2026 19:54:50 +0800 Subject: [PATCH 39/45] refactor: add more logs --- internal/v2/appinfo/appinfomodule.go | 4 +-- internal/v2/appinfo/cache.go | 10 +++++- .../v2/appinfo/status_correction_check.go | 36 +++++++++---------- 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/internal/v2/appinfo/appinfomodule.go b/internal/v2/appinfo/appinfomodule.go index ee23a08..fb8e491 100644 --- a/internal/v2/appinfo/appinfomodule.go +++ b/internal/v2/appinfo/appinfomodule.go @@ -646,7 +646,7 @@ func (m *AppInfoModule) initDataWatcherUser() error { // initDataWatcherRepo initializes the DataWatcherRepo func (m *AppInfoModule) initDataWatcherRepo() error { - glog.V(3).Info("Initializing DataWatcherRepo...") + glog.V(2).Info("Initializing DataWatcherRepo...") if m.redisClient == nil { return fmt.Errorf("redis client is required for DataWatcherRepo") @@ -669,7 +669,7 @@ func (m *AppInfoModule) initDataWatcherRepo() error { // initStatusCorrectionChecker initializes the StatusCorrectionChecker func (m *AppInfoModule) initStatusCorrectionChecker() error { - glog.V(3).Info("Initializing StatusCorrectionChecker...") + glog.V(2).Info("Initializing StatusCorrectionChecker...") if m.cacheManager == nil { return fmt.Errorf("cache manager is required for StatusCorrectionChecker") diff --git a/internal/v2/appinfo/cache.go b/internal/v2/appinfo/cache.go index 0a2019e..0ffc785 100644 --- a/internal/v2/appinfo/cache.go +++ b/internal/v2/appinfo/cache.go @@ -2767,12 +2767,20 @@ func (cm *CacheManager) GetCachedData() string { var pendings []string if len(sv.AppInfoLatestPending) < 10 { for _, pending := range sv.AppInfoLatestPending { - pendings = append(pendings, fmt.Sprintf("%s_%s_%s", sn, pending.AppInfo.AppEntry.Name, pending.AppInfo.AppEntry.Version)) + pendings = append(pendings, fmt.Sprintf("%s_%s", pending.AppInfo.AppEntry.Name, pending.AppInfo.AppEntry.Version)) } } apps["pending_apps"] = pendings apps["failed"] = len(sv.AppRenderFailed) + var failes []string + if len(sv.AppRenderFailed) < 5 { + for _, fail := range sv.AppRenderFailed { + failes = append(failes, fmt.Sprintf("%s_%s", fail.AppInfo.AppEntry.Name, fail.AppInfo.AppEntry.Version)) + } + } + apps["failed_apps"] = failes + apps["history"] = len(sv.AppInfoHistory) apps["state"] = len(sv.AppStateLatest) var status []string diff --git a/internal/v2/appinfo/status_correction_check.go b/internal/v2/appinfo/status_correction_check.go index ce8f3c1..1cbe5f9 100644 --- a/internal/v2/appinfo/status_correction_check.go +++ b/internal/v2/appinfo/status_correction_check.go @@ -214,26 +214,26 @@ func (scc *StatusCorrectionChecker) performStatusCheck() map[string]bool { latestStatus, err := scc.fetchLatestStatus() if err != nil { - glog.Errorf("Failed to fetch latest status from app-service: %v", err) + glog.Errorf("[UserChanged] Failed to fetch latest status from app-service: %v", err) return result } - glog.V(2).Infof("Fetched status for %d applications and middlewares from app-service", len(latestStatus)) + glog.V(2).Infof("[UserChanged] Fetched status for %d applications and middlewares from app-service: %s", len(latestStatus), utils.ParseJson(latestStatus)) cachedStatus := scc.getCachedStatus() if len(cachedStatus) == 0 { - glog.Infof("No cached status found, skipping comparison") + glog.Error("[UserChanged] No cached status found, skipping comparison") return result } - glog.V(2).Infof("Found cached status for %d applications and middlewares", len(cachedStatus)) + glog.V(2).Infof("[UserChanged] Found cached status for %d applications and middlewares: %s", len(cachedStatus), utils.ParseJson(cachedStatus)) changes := scc.compareStatus(latestStatus, cachedStatus) - glog.V(2).Infof("[UserChanged] Found cached status, changed: %+v", changes) + glog.V(2).Infof("[UserChanged] Found cached status, changed: %+v, app: %d, middlewares: %d", changes, len(latestStatus), len(cachedStatus)) if len(changes) > 0 { - glog.V(2).Infof("Detected %d status changes, applying corrections", len(changes)) + glog.V(2).Infof("[UserChanged] Detected %d status changes, applying corrections, changes: %s", len(changes), utils.ParseJson(changes)) scc.applyCorrections(changes, latestStatus) // Apply UserInfo changes and collect affected users. @@ -1340,7 +1340,7 @@ func (scc *StatusCorrectionChecker) checkAndCorrectTaskStatuses(latestStatus []u return } - glog.Infof("Checking %d running tasks for status correction", len(runningTasks)) + glog.Infof("[SCC] Checking %d running tasks for status correction", len(runningTasks)) // Create a map of app statuses for quick lookup: user:appName -> app status appStatusMap := make(map[string]*utils.AppServiceResponse) @@ -1371,39 +1371,39 @@ func (scc *StatusCorrectionChecker) checkAndCorrectTaskStatuses(latestStatus []u if runningTask.Type == task.CloneApp { taskTypeStr = "Clone" } - glog.Infof("Task status correction: %s task %s for app %s (user: %s) should be completed - app is running", + glog.Infof("[SCC] Task status correction: %s task %s for app %s (user: %s) should be completed - app is running", taskTypeStr, runningTask.ID, runningTask.AppName, runningTask.User) if err := scc.taskModule.InstallTaskSucceed(runningTask.OpID, runningTask.AppName, runningTask.User); err != nil { - glog.Warningf("Failed to mark %s task as succeeded: %v", taskTypeStr, err) + glog.Warningf("[SCC] Failed to mark %s task as succeeded: %v", taskTypeStr, err) } else { correctedCount++ - glog.Infof("Successfully corrected %s task status: %s", taskTypeStr, runningTask.ID) + glog.Infof("[SCC] Successfully corrected %s task status: %s", taskTypeStr, runningTask.ID) } } case task.UninstallApp: // For uninstall tasks: if app doesn't exist, mark task as completed if !exists { - glog.Infof("Task status correction: Uninstall task %s for app %s (user: %s) should be completed - app no longer exists", + glog.Infof("[SCC] Task status correction: Uninstall task %s for app %s (user: %s) should be completed - app no longer exists", runningTask.ID, runningTask.AppName, runningTask.User) if err := scc.taskModule.UninstallTaskSucceed(runningTask.OpID, runningTask.AppName, runningTask.User); err != nil { - glog.Warningf("Failed to mark uninstall task as succeeded: %v", err) + glog.Warningf("[SCC] Failed to mark uninstall task as succeeded: %v", err) } else { correctedCount++ - glog.Infof("Successfully corrected uninstall task status: %s", runningTask.ID) + glog.Infof("[SCC] Successfully corrected uninstall task status: %s", runningTask.ID) } } case task.CancelAppInstall: // For cancel install tasks: if app doesn't exist, mark task as completed if !exists { - glog.Infof("Task status correction: Cancel install task %s for app %s (user: %s) should be completed - app no longer exists", + glog.Infof("[SCC] Task status correction: Cancel install task %s for app %s (user: %s) should be completed - app no longer exists", runningTask.ID, runningTask.AppName, runningTask.User) if err := scc.taskModule.CancelInstallTaskSucceed(runningTask.OpID, runningTask.AppName, runningTask.User); err != nil { - glog.Warningf("Failed to mark cancel install task as succeeded: %v", err) + glog.Warningf("[SCC] Failed to mark cancel install task as succeeded: %v", err) } else { correctedCount++ - glog.Infof("Successfully corrected cancel install task status: %s", runningTask.ID) + glog.Infof("[SCC] Successfully corrected cancel install task status: %s", runningTask.ID) } } @@ -1413,14 +1413,14 @@ func (scc *StatusCorrectionChecker) checkAndCorrectTaskStatuses(latestStatus []u // are typically completed through their normal execution flow. // We log it for monitoring but don't auto-correct to avoid conflicts. if exists && appStatus != nil && appStatus.Status.State == "running" { - glog.Infof("Task status correction: Upgrade task %s for app %s (user: %s) appears completed - app is running (not auto-correcting)", + glog.Infof("[SCC] Task status correction: Upgrade task %s for app %s (user: %s) appears completed - app is running (not auto-correcting)", runningTask.ID, runningTask.AppName, runningTask.User) } } } if correctedCount > 0 { - glog.Infof("Task status correction completed: corrected %d task(s)", correctedCount) + glog.Infof("[SCC] Task status correction completed: corrected %d task(s)", correctedCount) scc.mutex.Lock() scc.correctionCount += int64(correctedCount) scc.mutex.Unlock() From 6e991fb8f839eb718ae3caad6d6ebeb0183ff1be Mon Sep 17 00:00:00 2001 From: aby913 Date: Fri, 13 Mar 2026 21:04:53 +0800 Subject: [PATCH 40/45] fix: no push when Resuming app --- internal/v2/appinfo/datawatcher_state.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/v2/appinfo/datawatcher_state.go b/internal/v2/appinfo/datawatcher_state.go index febecc0..d798bb9 100644 --- a/internal/v2/appinfo/datawatcher_state.go +++ b/internal/v2/appinfo/datawatcher_state.go @@ -557,6 +557,7 @@ func (dw *DataWatcherState) handleMessage(msg *nats.Msg) { // + case appStateMsg.State == "downloadingCanceled" && appState.Status.State == "pending": case appStateMsg.State == "installingCanceled" && appState.Status.State == "installing": case appStateMsg.State == "installingCanceled" && appState.Status.State == "installingCanceling": + case appStateMsg.State == "running" && appState.Status.State == "resuming": default: // state = downloading if len(appStateMsg.EntranceStatuses) == 0 && appState.Status.Progress == appStateMsg.Progress { From 6e967e383fafea5c517e2699ca055ecce58a8711 Mon Sep 17 00:00:00 2001 From: aby913 Date: Wed, 18 Mar 2026 16:45:16 +0800 Subject: [PATCH 41/45] fix: add nats message field --- internal/v2/appinfo/datawatcher_state.go | 9 +++++++++ internal/v2/appinfo/status_correction_check.go | 2 ++ internal/v2/types/types.go | 18 ++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/internal/v2/appinfo/datawatcher_state.go b/internal/v2/appinfo/datawatcher_state.go index d798bb9..9679b25 100644 --- a/internal/v2/appinfo/datawatcher_state.go +++ b/internal/v2/appinfo/datawatcher_state.go @@ -62,6 +62,8 @@ type AppStateMessage struct { User string `json:"user"` Progress string `json:"progress"` MarketSource string `json:"marketSource"` + Reason string `json:"reason"` + Message string `json:"message"` EntranceStatuses []EntranceStatus `json:"entranceStatuses"` SharedEntrances []SharedEntrance `json:"sharedEntrances,omitempty"` } @@ -558,6 +560,9 @@ func (dw *DataWatcherState) handleMessage(msg *nats.Msg) { // + case appStateMsg.State == "installingCanceled" && appState.Status.State == "installing": case appStateMsg.State == "installingCanceled" && appState.Status.State == "installingCanceling": case appStateMsg.State == "running" && appState.Status.State == "resuming": + case appStateMsg.State == "stopped" && appState.Status.State == "resuming": + case appStateMsg.State == "installingCanceled" && appState.Status.State == "resuming": + case appStateMsg.State == "stopped" && appState.Status.State == "stopping": default: // state = downloading if len(appStateMsg.EntranceStatuses) == 0 && appState.Status.Progress == appStateMsg.Progress { @@ -804,6 +809,8 @@ func (dw *DataWatcherState) storeStateToCache(msg AppStateMessage) { "rawAppName": msg.RawAppName, // Add raw app name for clone app support "title": msg.Title, // Add title from message "opType": msg.OpType, // Add operation type from message + "message": msg.Message, + "reason": msg.Reason, } // Add SharedEntrances if present @@ -1078,6 +1085,8 @@ func (dw *DataWatcherState) printAppStateMessage(msg AppStateMessage) { glog.V(2).Infof("Operation Type: %s", msg.OpType) glog.V(2).Infof("Operation ID: %s", msg.OpID) glog.V(2).Infof("User: %s", msg.User) + glog.V(2).Infof("Reason: %s", msg.Reason) + glog.V(2).Infof("Message: %s", msg.Message) glog.V(2).Info("Entrance Statuses:") for i, status := range msg.EntranceStatuses { glog.V(2).Infof(" [%d] Name: %s, State: %s, Status Time: %s, Reason: %s", diff --git a/internal/v2/appinfo/status_correction_check.go b/internal/v2/appinfo/status_correction_check.go index 1cbe5f9..d7a3b1a 100644 --- a/internal/v2/appinfo/status_correction_check.go +++ b/internal/v2/appinfo/status_correction_check.go @@ -1166,6 +1166,8 @@ func (scc *StatusCorrectionChecker) createAppStateDataFromResponse(app utils.App LastTransitionTime string `json:"lastTransitionTime"` Progress string `json:"progress"` OpType string `json:"opType,omitempty"` + Message string `json:"message"` + Reason string `json:"reason"` EntranceStatuses []struct { ID string `json:"id"` Name string `json:"name"` diff --git a/internal/v2/types/types.go b/internal/v2/types/types.go index 6a11ff1..56fe7b8 100644 --- a/internal/v2/types/types.go +++ b/internal/v2/types/types.go @@ -167,6 +167,8 @@ type AppStateLatestData struct { LastTransitionTime string `json:"lastTransitionTime"` Progress string `json:"progress"` OpType string `json:"opType,omitempty"` // Operation type: install, upgrade, uninstall, etc. + Message string `json:"message"` + Reason string `json:"reason"` EntranceStatuses []struct { ID string `json:"id"` // ID extracted from URL's first segment after splitting by "." Name string `json:"name"` @@ -576,6 +578,7 @@ type AppInfoUpdate struct { AppName string `json:"app_name"` // App name NotifyType string `json:"notify_type"` // Notify type Source string `json:"source"` // Source + Message string `json:"message"` } type MarketSystemUpdate struct { @@ -713,6 +716,9 @@ func NewAppStateLatestData(data map[string]interface{}, userID string, getInfoFu Invisible bool `json:"invisible"` } + var statusReason = "" + var statusMessage = "" + // Extract name from various possible fields if nameVal, ok := data["name"].(string); ok && nameVal != "" { name = nameVal @@ -757,6 +763,14 @@ func NewAppStateLatestData(data map[string]interface{}, userID string, getInfoFu if opTypeVal, ok := data["opType"].(string); ok { opType = opTypeVal } + // Extract reason from data + if reasonVal, ok := data["reason"]; ok && reasonVal != nil { + statusReason = reasonVal.(string) + } + // Extract message from data + if messageVal, ok := data["message"]; ok && messageVal != nil { + statusMessage = messageVal.(string) + } // Extract SharedEntrances var sharedEntrances []struct { @@ -1033,6 +1047,8 @@ func NewAppStateLatestData(data map[string]interface{}, userID string, getInfoFu LastTransitionTime string `json:"lastTransitionTime"` Progress string `json:"progress"` OpType string `json:"opType,omitempty"` + Message string `json:"message"` + Reason string `json:"reason"` EntranceStatuses []struct { ID string `json:"id"` Name string `json:"name"` @@ -1065,6 +1081,8 @@ func NewAppStateLatestData(data map[string]interface{}, userID string, getInfoFu LastTransitionTime: lastTransitionTime, Progress: progress, OpType: opType, + Message: statusMessage, + Reason: statusReason, EntranceStatuses: entranceStatuses, SharedEntrances: sharedEntrances, }, From 5d5e53a182d7238fb24febe72efcddb42c246afa Mon Sep 17 00:00:00 2001 From: aby913 Date: Fri, 20 Mar 2026 13:21:21 +0800 Subject: [PATCH 42/45] fix: add pendingCanceled status check when deleting app --- internal/v2/appinfo/datawatcher_state.go | 1 + pkg/v2/api/app.go | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/internal/v2/appinfo/datawatcher_state.go b/internal/v2/appinfo/datawatcher_state.go index 9679b25..a034e9c 100644 --- a/internal/v2/appinfo/datawatcher_state.go +++ b/internal/v2/appinfo/datawatcher_state.go @@ -555,6 +555,7 @@ func (dw *DataWatcherState) handleMessage(msg *nats.Msg) { // + case appStateMsg.State == "uninstalled" && appState.Status.State == "installingCanceling": case appStateMsg.State == "uninstalled" && appState.Status.State == "installingCancelFailed": case appStateMsg.State == "pendingCanceled" && appState.Status.State == "pending": + case appStateMsg.State == "stopped" && appState.Status.State == "pending": case appStateMsg.State == "downloadingCanceled" && appState.Status.State == "downloadingCanceling": case appStateMsg.State == "downloadingCanceled" && appState.Status.State == "pending": case appStateMsg.State == "installingCanceled" && appState.Status.State == "installing": diff --git a/pkg/v2/api/app.go b/pkg/v2/api/app.go index 6187c4b..f5d26e1 100644 --- a/pkg/v2/api/app.go +++ b/pkg/v2/api/app.go @@ -2107,7 +2107,8 @@ func (s *Server) deleteLocalApp(w http.ResponseWriter, r *http.Request) { if matchesApp { // Treat installFailed same as uninstalled so deletion can proceed - if appState.Status.State != "uninstalled" && appState.Status.State != "installFailed" && appState.Status.State != "downloadFailed" && appState.Status.State != "installingCanceled" && appState.Status.State != "downloadingCanceled" { + glog.Infof("App %s state: %s", request.AppName, appState.Status.State) + if appState.Status.State != "uninstalled" && appState.Status.State != "installFailed" && appState.Status.State != "downloadFailed" && appState.Status.State != "installingCanceled" && appState.Status.State != "downloadingCanceled" && appState.Status.State != "pendingCanceled" { appInstalled = true glog.V(2).Infof("App %s (or its clone %s) is still installed in upload source with state: %s", request.AppName, installedAppName, appState.Status.State) From 4e83cbf7a4444d26b53f4890d03cb089d255224e Mon Sep 17 00:00:00 2001 From: aby913 Date: Fri, 20 Mar 2026 13:21:54 +0800 Subject: [PATCH 43/45] fix: complete rawAppName field content --- pkg/v2/api/app.go | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pkg/v2/api/app.go b/pkg/v2/api/app.go index f5d26e1..e27ac28 100644 --- a/pkg/v2/api/app.go +++ b/pkg/v2/api/app.go @@ -1089,6 +1089,8 @@ func (s *Server) convertSourceDataToFiltered(sourceData *types.SourceData) *Filt } } + s.patchStateRawName(sourceData) + filteredSourceData := &FilteredSourceData{ Type: sourceData.Type, AppStateLatest: sourceData.AppStateLatest, @@ -1196,6 +1198,8 @@ func (s *Server) convertSourceDataToFilteredForState(sourceData *types.SourceDat return nil } + s.patchStateRawName(sourceData) + filteredSourceData := &FilteredSourceDataForState{ Type: sourceData.Type, AppStateLatest: sourceData.AppStateLatest, @@ -2788,3 +2792,22 @@ func (s *Server) resendPaymentVC(w http.ResponseWriter, r *http.Request) { "product_id": productID, }) } + +// helper +func (s *Server) patchStateRawName(sourceData *types.SourceData) { + if len(sourceData.AppStateLatest) == 0 || len(sourceData.AppInfoLatest) == 0 { + return + } + + for _, state := range sourceData.AppStateLatest { + if state.Status.RawAppName == "" { + for _, app := range sourceData.AppInfoLatest { + if strings.HasPrefix(state.Status.Name, app.AppInfo.AppEntry.Name) { + state.Status.RawAppName = app.AppInfo.AppEntry.Name + glog.Infof("[PATCH] App state: %s, app: %s", state.Status.Name, app.AppInfo.AppEntry.Name) + break + } + } + } + } +} From c449fd9d6623cb2464e20eb3c391dac12bf67108 Mon Sep 17 00:00:00 2001 From: aby913 Date: Mon, 23 Mar 2026 11:38:47 +0800 Subject: [PATCH 44/45] fix: add logs --- internal/v2/appinfo/pipeline.go | 4 ++-- .../v2/appinfo/status_correction_check.go | 22 ++++++++++++++++--- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go index 20b36e4..e647a91 100644 --- a/internal/v2/appinfo/pipeline.go +++ b/internal/v2/appinfo/pipeline.go @@ -175,8 +175,8 @@ func (p *Pipeline) phaseHydrateApps(ctx context.Context) map[string]bool { return affectedUsers } - // count := p.cacheManager.RestoreRetryableFailedToPending(20) - // glog.Infof("Pipeline Phase 2: restore %d Failed to Pending", count) + count := p.cacheManager.RestoreRetryableFailedToPending(20) + glog.Infof("Pipeline Phase 2: restore %d Failed to Pending", count) items := p.cacheManager.CollectAllPendingItems() diff --git a/internal/v2/appinfo/status_correction_check.go b/internal/v2/appinfo/status_correction_check.go index d7a3b1a..1bd8e15 100644 --- a/internal/v2/appinfo/status_correction_check.go +++ b/internal/v2/appinfo/status_correction_check.go @@ -218,7 +218,7 @@ func (scc *StatusCorrectionChecker) performStatusCheck() map[string]bool { return result } - glog.V(2).Infof("[UserChanged] Fetched status for %d applications and middlewares from app-service: %s", len(latestStatus), utils.ParseJson(latestStatus)) + glog.V(3).Infof("[UserChanged] Fetched status for %d applications and middlewares from app-service: %s", len(latestStatus), utils.ParseJson(latestStatus)) cachedStatus := scc.getCachedStatus() if len(cachedStatus) == 0 { @@ -226,7 +226,7 @@ func (scc *StatusCorrectionChecker) performStatusCheck() map[string]bool { return result } - glog.V(2).Infof("[UserChanged] Found cached status for %d applications and middlewares: %s", len(cachedStatus), utils.ParseJson(cachedStatus)) + glog.V(3).Infof("[UserChanged] Found cached status for %d applications and middlewares: %s", len(cachedStatus), utils.ParseJson(cachedStatus)) changes := scc.compareStatus(latestStatus, cachedStatus) @@ -293,7 +293,14 @@ func (scc *StatusCorrectionChecker) fetchLatestStatus() ([]utils.AppServiceRespo return appsStatus, nil } - // glog.Infof("[SCC] appStatus: %s, middlewareStatus: %s", utils.ParseJson(appsStatus), utils.ParseJson(middlewaresStatus)) + var printf []interface{} + for _, md := range appsStatus { + if md.Spec.Name != "olares-app" { + printf = append(printf, md) + } + } + + glog.Infof("[SCC] fetch latest appStatus: %s", utils.ParseJson(printf)) // Combine apps and middlewares status // Convert middlewares to AppServiceResponse format and merge with apps @@ -521,6 +528,15 @@ func (scc *StatusCorrectionChecker) getCachedStatus() map[string]*types.AppState } } + var printf = make(map[string]interface{}) + for k, v := range cachedStatus { + if !strings.HasSuffix(k, "olares-app") { + printf[k] = v + } + } + + glog.Infof("[SCC] fetch cached appStatus: %s", utils.ParseJson(printf)) + return cachedStatus } From 1816ebc08c2235129031d078493b469883d7dbe2 Mon Sep 17 00:00:00 2001 From: dkeven Date: Fri, 20 Mar 2026 14:03:39 +0800 Subject: [PATCH 45/45] fix: separate param for different local sources (cherry picked from commit 8b9911043a9ba43170e7a928b070e750a773d9b8) --- Dockerfile.server | 4 +-- internal/v2/appinfo/localrepo.go | 14 +++++++---- pkg/v2/api/app.go | 43 +++++++++++++++++++------------- 3 files changed, 36 insertions(+), 25 deletions(-) diff --git a/Dockerfile.server b/Dockerfile.server index d19c508..8239670 100644 --- a/Dockerfile.server +++ b/Dockerfile.server @@ -35,8 +35,8 @@ RUN cd bytetrade.io/web3os/market && \ # Use distroless as minimal base image to package the manager binary # Refer to https://github.com/GoogleContainerTools/distroless for more details -FROM golang:1.23.11-alpine +FROM alpine:3.23 WORKDIR /opt/app COPY --from=builder /workspace/bytetrade.io/web3os/market/market . -CMD ["/opt/app/market", "-v", "2", "--logtostderr"] +ENTRYPOINT ["/opt/app/market", "-v", "2", "--logtostderr"] diff --git a/internal/v2/appinfo/localrepo.go b/internal/v2/appinfo/localrepo.go index 5208770..8e5bfb1 100644 --- a/internal/v2/appinfo/localrepo.go +++ b/internal/v2/appinfo/localrepo.go @@ -268,7 +268,7 @@ func (lr *LocalRepo) UploadAppPackage(userID, sourceID string, fileBytes []byte, } // Add source field - if err := writer.WriteField("source", "upload"); err != nil { + if err := writer.WriteField("source", sourceID); err != nil { return nil, fmt.Errorf("failed to write source field: %w", err) } @@ -353,13 +353,17 @@ func (lr *LocalRepo) UploadAppPackage(userID, sourceID string, fileBytes []byte, return latest.RawData, nil } -func (lr *LocalRepo) DeleteApp(userID, appName, appVersion string, token string) error { - glog.V(2).Infof("Deleting app: %s, version: %s, user: %s", appName, appVersion, userID) +func (lr *LocalRepo) DeleteApp(userID, appName, appVersion, sourceID string, token string) error { + glog.V(2).Infof("Deleting app: %s, version: %s, source: %s, user: %s", appName, appVersion, sourceID, userID) + + if sourceID == "" { + sourceID = "upload" + } // Check if the app is currently being installed if lr.taskModule != nil { taskType, source, found, completed := lr.taskModule.GetLatestTaskByAppNameAndUser(appName, userID) - if found && !completed && taskType == "install" && source == "upload" { + if found && !completed && taskType == "install" && source == sourceID { glog.V(3).Infof("Cannot delete app %s: app is currently being installed (task type: %s, source: %s)", appName, taskType, source) return fmt.Errorf("cannot delete app %s: app is currently being installed", appName) } @@ -381,7 +385,7 @@ func (lr *LocalRepo) DeleteApp(userID, appName, appVersion string, token string) bodyMap := map[string]string{ "app_name": appName, "app_version": appVersion, - "source_id": "upload", + "source_id": sourceID, } bodyBytes, err := json.Marshal(bodyMap) if err != nil { diff --git a/pkg/v2/api/app.go b/pkg/v2/api/app.go index e27ac28..00dc3b8 100644 --- a/pkg/v2/api/app.go +++ b/pkg/v2/api/app.go @@ -1973,6 +1973,7 @@ func (s *Server) createSafeTagCopy(tag *types.Tag) map[string]interface{} { type DeleteLocalAppRequest struct { AppName string `json:"app_name"` AppVersion string `json:"app_version"` + Source string `json:"source"` } // deleteLocalApp handles DELETE /api/v2/apps/delete @@ -2028,9 +2029,15 @@ func (s *Server) deleteLocalApp(w http.ResponseWriter, r *http.Request) { return } - glog.V(2).Infof("Received delete request for app: %s, version: %s from user: %s", request.AppName, request.AppVersion, userID) + // Default source_id to "upload" for backward compatibility + sourceID := request.Source + if sourceID == "" { + sourceID = "upload" + } + + glog.V(2).Infof("Received delete request for app: %s, version: %s, source: %s from user: %s", request.AppName, request.AppVersion, sourceID, userID) - // Step 5: Check if app exists in upload source + // Step 5: Check if app exists in the specified source userData := s.cacheManager.GetUserData(userID) if userData == nil { glog.V(3).Infof("User data not found for user: %s", userID) @@ -2038,10 +2045,10 @@ func (s *Server) deleteLocalApp(w http.ResponseWriter, r *http.Request) { return } - sourceData, exists := userData.Sources["upload"] + sourceData, exists := userData.Sources[sourceID] if !exists { - glog.V(3).Infof("upload source not found for user: %s", userID) - s.sendResponse(w, http.StatusNotFound, false, "upload source not found", nil) + glog.V(3).Infof("Source '%s' not found for user: %s", sourceID, userID) + s.sendResponse(w, http.StatusNotFound, false, fmt.Sprintf("Source '%s' not found", sourceID), nil) return } @@ -2079,18 +2086,18 @@ func (s *Server) deleteLocalApp(w http.ResponseWriter, r *http.Request) { } if !appExists { - glog.V(3).Infof("App %s version %s not found in upload source for user: %s", request.AppName, request.AppVersion, userID) - s.sendResponse(w, http.StatusNotFound, false, "App not found in upload source", nil) + glog.V(3).Infof("App %s version %s not found in source '%s' for user: %s", request.AppName, request.AppVersion, sourceID, userID) + s.sendResponse(w, http.StatusNotFound, false, fmt.Sprintf("App not found in source '%s'", sourceID), nil) return } - // Step 6.5: Check if app is uninstalled in upload source (including clone apps) + // Step 6.5: Check if app is uninstalled in the source (including clone apps) // Only allow deletion if the app is uninstalled or not found in AppStateLatest - glog.V(3).Infof("Checking if app %s is uninstalled in upload source for user: %s", request.AppName, userID) + glog.V(3).Infof("Checking if app %s is uninstalled in source '%s' for user: %s", request.AppName, sourceID, userID) appInstalled := false var installedAppName string - // Check upload source for installed instances of this app + // Check the source for installed instances of this app if sourceData.AppStateLatest != nil { for _, appState := range sourceData.AppStateLatest { if appState == nil { @@ -2114,8 +2121,8 @@ func (s *Server) deleteLocalApp(w http.ResponseWriter, r *http.Request) { glog.Infof("App %s state: %s", request.AppName, appState.Status.State) if appState.Status.State != "uninstalled" && appState.Status.State != "installFailed" && appState.Status.State != "downloadFailed" && appState.Status.State != "installingCanceled" && appState.Status.State != "downloadingCanceled" && appState.Status.State != "pendingCanceled" { appInstalled = true - glog.V(2).Infof("App %s (or its clone %s) is still installed in upload source with state: %s", - request.AppName, installedAppName, appState.Status.State) + glog.V(2).Infof("App %s (or its clone %s) is still installed in source '%s' with state: %s", + request.AppName, installedAppName, sourceID, appState.Status.State) break } } @@ -2123,19 +2130,19 @@ func (s *Server) deleteLocalApp(w http.ResponseWriter, r *http.Request) { } if appInstalled { - glog.V(2).Infof("Cannot delete app %s: app (or its clone %s) is still installed in upload source", - request.AppName, installedAppName) + glog.V(2).Infof("Cannot delete app %s: app (or its clone %s) is still installed in source '%s'", + request.AppName, installedAppName, sourceID) s.sendResponse(w, http.StatusBadRequest, false, fmt.Sprintf("Cannot delete app: app (or its clone %s) is still installed. Please uninstall it first.", installedAppName), nil) return } - glog.V(3).Infof("App %s is uninstalled in upload source, proceeding with deletion", request.AppName) + glog.V(3).Infof("App %s is uninstalled in source '%s', proceeding with deletion", request.AppName, sourceID) // Step 7: Delete chart files using LocalRepo // Delete chart package file - if err := s.localRepo.DeleteApp(userID, request.AppName, request.AppVersion, token); err != nil { + if err := s.localRepo.DeleteApp(userID, request.AppName, request.AppVersion, sourceID, token); err != nil { glog.Errorf("Failed to delete chart package: %v", err) // Continue with deletion even if chart file doesn't exist s.sendResponse(w, http.StatusInternalServerError, false, "Failed to delete chart package", nil) @@ -2143,13 +2150,13 @@ func (s *Server) deleteLocalApp(w http.ResponseWriter, r *http.Request) { } // Step 8: Remove app from AppStateLatest - if err := s.cacheManager.RemoveAppStateData(userID, "upload", request.AppName); err != nil { + if err := s.cacheManager.RemoveAppStateData(userID, sourceID, request.AppName); err != nil { glog.Errorf("Failed to remove app from AppStateLatest: %v", err) // Continue with deletion even if app state doesn't exist } // Step 9: Remove app from AppInfoLatest - if err := s.cacheManager.RemoveAppInfoLatestData(userID, "upload", request.AppName); err != nil { + if err := s.cacheManager.RemoveAppInfoLatestData(userID, sourceID, request.AppName); err != nil { glog.Errorf("Failed to remove app from AppInfoLatest: %v", err) s.sendResponse(w, http.StatusInternalServerError, false, "Failed to remove app from cache", nil) return