diff --git a/internal/v2/appinfo/appinfomodule.go b/internal/v2/appinfo/appinfomodule.go index e3ba9e2..fb8e491 100644 --- a/internal/v2/appinfo/appinfomodule.go +++ b/internal/v2/appinfo/appinfomodule.go @@ -26,10 +26,11 @@ type AppInfoModule struct { redisClient *RedisClient syncer *Syncer hydrator *Hydrator + pipeline *Pipeline dataWatcher *DataWatcher dataWatcherState *DataWatcherState dataWatcherUser *DataWatcherUser - dataWatcherRepo *DataWatcherRepo // Add DataWatcherRepo for image info updates + dataWatcherRepo *DataWatcherRepo dataSender *DataSender statusCorrectionChecker *StatusCorrectionChecker settingsManager *settings.SettingsManager @@ -223,10 +224,29 @@ func (m *AppInfoModule) Start() error { } } - // Set up hydration notifier connection if both cache and hydrator are enabled - if m.config.EnableCache && m.config.EnableHydrator && m.cacheManager != nil && m.hydrator != nil { - m.cacheManager.SetHydrationNotifier(m.hydrator) - glog.Infof("Hydration notifier connection established between cache manager and hydrator") + // Create and start Pipeline to orchestrate all components serially + if m.config.EnableHydrator && m.cacheManager != nil { + p := NewPipeline(m.cacheManager, m.cacheManager.cache, 30*time.Second) + if m.syncer != nil { + p.SetSyncer(m.syncer) + } + if m.hydrator != nil { + p.SetHydrator(m.hydrator) + } + if m.dataWatcher != nil { + p.SetDataWatcher(m.dataWatcher) + } + if m.dataWatcherRepo != nil { + p.SetDataWatcherRepo(m.dataWatcherRepo) + } + if m.statusCorrectionChecker != nil { + p.SetStatusCorrectionChecker(m.statusCorrectionChecker) + } + if err := p.Start(m.ctx); err != nil { + return fmt.Errorf("failed to start Pipeline: %w", err) + } + m.pipeline = p + glog.Infof("Pipeline started, all components orchestrated serially") } m.isStarted = true @@ -245,7 +265,11 @@ func (m *AppInfoModule) Stop() error { glog.V(3).Info("Stopping AppInfo module...") - // Stop components in reverse order + // Stop Pipeline first (it orchestrates other components) + if m.pipeline != nil { + m.pipeline.Stop() + } + if m.hydrator != nil { m.hydrator.Stop() } @@ -378,25 +402,14 @@ func (m *AppInfoModule) GetRedisConfig() *RedisConfig { // IsStarted returns whether the module is currently running func (m *AppInfoModule) IsStarted() bool { - // Boolean read is atomic, but we need to ensure consistency with Start/Stop operations - if !m.mutex.TryRLock() { - glog.Warning("[TryRLock] AppInfoModule.IsStarted: Read lock not available, returning false") - return false - } + m.mutex.RLock() defer m.mutex.RUnlock() return m.isStarted } // GetModuleStatus returns the current status of the module and all components func (m *AppInfoModule) GetModuleStatus() map[string]interface{} { - // Need read lock to ensure consistent snapshot of all component states - if !m.mutex.TryRLock() { - glog.Warning("[TryRLock] AppInfoModule.GetModuleStatus: Read lock not available, returning error status") - return map[string]interface{}{ - "error": "lock not available", - "status": "unknown", - } - } + m.mutex.RLock() defer m.mutex.RUnlock() status := map[string]interface{}{ @@ -517,12 +530,12 @@ func (m *AppInfoModule) initSyncer() error { glog.V(3).Info("Cache manager reference set in syncer for hydration notifications") } - // Start syncer - if err := m.syncer.Start(m.ctx); err != nil { + // Start syncer in passive mode (Pipeline handles scheduling) + if err := m.syncer.StartWithOptions(m.ctx, false); err != nil { return fmt.Errorf("failed to start syncer: %w", err) } - glog.V(2).Info("Syncer initialized successfully") + glog.V(2).Info("Syncer initialized (passive mode, Pipeline handles scheduling)") return nil } @@ -583,12 +596,11 @@ func (m *AppInfoModule) initDataWatcher() error { // Create DataWatcher instance m.dataWatcher = NewDataWatcher(m.cacheManager, m.hydrator, m.dataSender) - // Start DataWatcher - if err := m.dataWatcher.Start(m.ctx); err != nil { + if err := m.dataWatcher.StartWithOptions(m.ctx, false); err != nil { return fmt.Errorf("failed to start DataWatcher: %w", err) } - glog.V(2).Info("DataWatcher initialized successfully") + glog.V(2).Info("DataWatcher initialized (passive mode)") return nil } @@ -634,7 +646,7 @@ func (m *AppInfoModule) initDataWatcherUser() error { // initDataWatcherRepo initializes the DataWatcherRepo func (m *AppInfoModule) initDataWatcherRepo() error { - glog.V(3).Info("Initializing DataWatcherRepo...") + glog.V(2).Info("Initializing DataWatcherRepo...") if m.redisClient == nil { return fmt.Errorf("redis client is required for DataWatcherRepo") @@ -647,18 +659,17 @@ func (m *AppInfoModule) initDataWatcherRepo() error { // Create DataWatcherRepo instance m.dataWatcherRepo = NewDataWatcherRepo(m.redisClient, m.cacheManager, m.dataWatcher, m.dataSender) - // Start DataWatcherRepo - if err := m.dataWatcherRepo.Start(); err != nil { + if err := m.dataWatcherRepo.StartWithOptions(false); err != nil { return fmt.Errorf("failed to start DataWatcherRepo: %w", err) } - glog.V(2).Info("DataWatcherRepo initialized successfully") + glog.V(2).Info("DataWatcherRepo initialized (passive mode)") return nil } // initStatusCorrectionChecker initializes the StatusCorrectionChecker func (m *AppInfoModule) initStatusCorrectionChecker() error { - glog.V(3).Info("Initializing StatusCorrectionChecker...") + glog.V(2).Info("Initializing StatusCorrectionChecker...") if m.cacheManager == nil { return fmt.Errorf("cache manager is required for StatusCorrectionChecker") @@ -666,12 +677,11 @@ func (m *AppInfoModule) initStatusCorrectionChecker() error { m.statusCorrectionChecker = NewStatusCorrectionChecker(m.cacheManager) - // Start StatusCorrectionChecker - if err := m.statusCorrectionChecker.Start(); err != nil { + if err := m.statusCorrectionChecker.StartWithOptions(false); err != nil { return fmt.Errorf("failed to start StatusCorrectionChecker: %w", err) } - glog.V(2).Info("StatusCorrectionChecker initialized successfully") + glog.V(2).Info("StatusCorrectionChecker initialized (passive mode)") return nil } @@ -750,17 +760,11 @@ func (m *AppInfoModule) correctCacheWithChartRepo() error { return fmt.Errorf("cache manager not available") } - // Add detailed lock logs for diagnosis - glog.V(3).Infof("[LOCK] m.cacheManager.mutex.TryLock() @appinfomodule:cleanup Start") - if !m.cacheManager.mutex.TryLock() { - glog.Warning("[TryLock] AppInfoModule cleanup: CacheManager write lock not available, skipping cleanup") - return nil - } - defer m.cacheManager.mutex.Unlock() - removedCount := 0 - for userID, userData := range m.cacheManager.cache.Users { + // Build the set of delisted app IDs (apps NOT in validApps) + delistedAppIDs := make(map[string]bool) + allUsersData := m.cacheManager.GetAllUsersData() // ~ correctCacheWithChartRepo + for _, userData := range allUsersData { for sourceID, sourceData := range userData.Sources { - newLatest := sourceData.AppInfoLatest[:0] for _, app := range sourceData.AppInfoLatest { var appID string if app != nil && app.RawData != nil { @@ -772,22 +776,19 @@ func (m *AppInfoModule) correctCacheWithChartRepo() error { appID = app.RawData.Name } } - if appID != "" && validApps[sourceID] != nil { - if _, ok := validApps[sourceID][appID]; ok { - newLatest = append(newLatest, app) - } else { - removedCount++ - glog.V(3).Infof("Removed app from cache: user=%s source=%s appID=%s", userID, sourceID, appID) - } - } else { - // If appID is empty, treat as invalid and remove - removedCount++ - glog.V(3).Infof("Removed app from cache (empty appID): user=%s source=%s", userID, sourceID) + if appID == "" { + continue + } + if validApps[sourceID] == nil { + delistedAppIDs[appID] = true + } else if _, ok := validApps[sourceID][appID]; !ok { + delistedAppIDs[appID] = true } } - sourceData.AppInfoLatest = newLatest } } + + removedCount := m.cacheManager.RemoveDelistedApps(delistedAppIDs) glog.V(2).Infof("Cache correction finished, removed %d apps not in chart repo", removedCount) return nil } @@ -1109,7 +1110,7 @@ func (m *AppInfoModule) SetAppData(userID, sourceID string, dataType AppDataType if !m.isStarted || m.cacheManager == nil { return fmt.Errorf("module is not started or cache manager is not available") } - return m.cacheManager.SetAppData(userID, sourceID, dataType, data) + return m.cacheManager.SetAppData(userID, sourceID, dataType, data, "AppInfoModule") } // GetAppData is a convenience function to get app data @@ -1271,6 +1272,7 @@ func (m *AppInfoModule) SyncUserListToCache() error { } // RefreshUserDataStructures ensures all configured users have proper data structures +// not used func (m *AppInfoModule) RefreshUserDataStructures() error { // Check isStarted without lock since it's only read if !m.isStarted { @@ -1316,7 +1318,7 @@ func (m *AppInfoModule) GetCachedUsers() []string { return []string{} } - allUsersData := m.cacheManager.GetAllUsersData() + allUsersData := m.cacheManager.GetAllUsersData() // not used users := make([]string, 0, len(allUsersData)) for userID := range allUsersData { users = append(users, userID) @@ -1355,21 +1357,14 @@ func (m *AppInfoModule) GetInvalidDataReport() map[string]interface{} { }, } - if !m.cacheManager.mutex.TryRLock() { - glog.Warning("[TryRLock] AppInfoModule: CacheManager read lock not available, skipping operation") - return map[string]interface{}{ - "error": "lock not available", - "status": "unknown", - } - } - defer m.cacheManager.mutex.RUnlock() + allUsersForReport := m.cacheManager.GetAllUsersData() // not used totalUsers := 0 totalSources := 0 totalPendingData := 0 totalInvalidData := 0 - for userID, userData := range m.cacheManager.cache.Users { + for userID, userData := range allUsersForReport { totalUsers++ userReport := map[string]interface{}{ "sources": make(map[string]interface{}), diff --git a/internal/v2/appinfo/cache.go b/internal/v2/appinfo/cache.go index daa0b30..0ffc785 100644 --- a/internal/v2/appinfo/cache.go +++ b/internal/v2/appinfo/cache.go @@ -20,25 +20,21 @@ import ( "k8s.io/client-go/tools/cache" ) -// HydrationNotifier interface for notifying hydrator about pending data updates -type HydrationNotifier interface { - NotifyPendingDataUpdate(userID, sourceID string, pendingData map[string]interface{}) -} +type CompareAppStateMsgFunc func(appState *AppStateLatestData) // CacheManager manages the in-memory cache and Redis synchronization type CacheManager struct { - cache *CacheData - redisClient *RedisClient - userConfig *UserConfig - hydrationNotifier HydrationNotifier // Notifier for hydration updates - stateMonitor *utils.StateMonitor // State monitor for change detection - dataSender *DataSender // Direct data sender for bypassing state monitor - mutex sync.RWMutex - syncChannel chan SyncRequest - stopChannel chan bool - isRunning bool - settingsManager *settings.SettingsManager - cleanupTicker *time.Ticker // Timer for periodic cleanup of AppRenderFailed + cache *CacheData + redisClient *RedisClient + userConfig *UserConfig + stateMonitor *utils.StateMonitor // State monitor for change detection + dataSender *DataSender // Direct data sender for bypassing state monitor + mutex sync.RWMutex + syncChannel chan SyncRequest + stopChannel chan bool + isRunning bool + settingsManager *settings.SettingsManager + cleanupTicker *time.Ticker // Timer for periodic cleanup of AppRenderFailed // Lock monitoring lockStats struct { @@ -49,10 +45,6 @@ type CacheManager struct { lockCount int64 unlockCount int64 } - - // ForceSync rate limiting - forceSyncMutex sync.Mutex - lastForceSync time.Time } // startLockWatchdog starts a 1s watchdog for write lock sections and returns a stopper. @@ -75,87 +67,615 @@ func (cm *CacheManager) startLockWatchdog(tag string) func() { } } -// Lock acquires the cache manager's write lock -func (cm *CacheManager) Lock() { +// GetUserIDs returns a list of all user IDs in the cache +func (cm *CacheManager) GetUserIDs() []string { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + if cm.cache == nil { + return nil + } + + ids := make([]string, 0, len(cm.cache.Users)) + for id := range cm.cache.Users { + ids = append(ids, id) + } + return ids +} + +// GetOrCreateUserIDs returns all user IDs; if none exist, creates a default user first. +func (cm *CacheManager) GetOrCreateUserIDs(defaultUserID string) []string { + cm.mutex.RLock() + ids := make([]string, 0, len(cm.cache.Users)) + for id := range cm.cache.Users { + ids = append(ids, id) + } + cm.mutex.RUnlock() + + if len(ids) > 0 { + return ids + } + cm.mutex.Lock() + defer cm.mutex.Unlock() + + // Double-check after acquiring write lock + if len(cm.cache.Users) > 0 { + for id := range cm.cache.Users { + ids = append(ids, id) + } + return ids + } + + cm.cache.Users[defaultUserID] = NewUserDataEx(defaultUserID) + glog.V(3).Infof("No existing users found, created user %s as fallback", defaultUserID) + return []string{defaultUserID} } -// Unlock releases the cache manager's write lock -func (cm *CacheManager) Unlock() { - cm.mutex.Unlock() +// IsLocalSource returns true if the given source is of local type. +func (cm *CacheManager) IsLocalSource(userID, sourceID string) bool { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + userData, exists := cm.cache.Users[userID] + if !exists { + return false + } + sourceData, exists := userData.Sources[sourceID] + if !exists { + return false + } + return sourceData.Type == types.SourceDataTypeLocal } -// TryLock attempts to acquire the cache manager's write lock without blocking -// Returns true if lock acquired, false if would block -func (cm *CacheManager) TryLock() bool { - return cm.mutex.TryLock() +// SetUserHash atomically sets the hash for a user. +func (cm *CacheManager) SetUserHash(userID, hash string) { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + if userData, exists := cm.cache.Users[userID]; exists { + userData.Hash = hash + } } -// RLock acquires the cache manager's read lock -func (cm *CacheManager) RLock() { - cm.mutex.RLock() +// RemoveFromPendingList removes an app from the pending list for the given user/source. +func (cm *CacheManager) RemoveFromPendingList(userID, sourceID, appID string) { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + userData, ok := cm.cache.Users[userID] + if !ok { + return + } + sourceData, ok := userData.Sources[sourceID] + if !ok { + return + } + + newSlice := make([]*types.AppInfoLatestPendingData, 0, len(sourceData.AppInfoLatestPending)) + for _, p := range sourceData.AppInfoLatestPending { + if p != nil && p.RawData != nil && + (p.RawData.ID == appID || p.RawData.AppID == appID || p.RawData.Name == appID) { + continue + } + newSlice = append(newSlice, p) + } + sourceData.AppInfoLatestPending = newSlice } -// RUnlock releases the cache manager's read lock -func (cm *CacheManager) RUnlock() { - cm.mutex.RUnlock() +// UpsertLatestAndRemovePending inserts or replaces an app in AppInfoLatest and removes +// it from AppInfoLatestPending. Returns the old version (if replaced), whether it was +// a replacement, and whether the user/source existed. +func (cm *CacheManager) UpsertLatestAndRemovePending( + userID, sourceID string, + latestData *types.AppInfoLatestData, + appID, appName string, +) (oldVersion string, replaced bool, ok bool) { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + userData, exists := cm.cache.Users[userID] + if !exists { + return "", false, false + } + sourceData, exists := userData.Sources[sourceID] + if !exists { + return "", false, false + } + + // Find existing app by name + existingIndex := -1 + for i, app := range sourceData.AppInfoLatest { + if app == nil { + continue + } + name := "" + if app.RawData != nil { + name = app.RawData.Name + } else if app.AppInfo != nil && app.AppInfo.AppEntry != nil { + name = app.AppInfo.AppEntry.Name + } else if app.AppSimpleInfo != nil { + name = app.AppSimpleInfo.AppName + } + if name == appName { + existingIndex = i + break + } + } + + if existingIndex >= 0 { + old := sourceData.AppInfoLatest[existingIndex] + if old.AppInfo != nil && old.AppInfo.AppEntry != nil { + oldVersion = old.AppInfo.AppEntry.Version + } + sourceData.AppInfoLatest[existingIndex] = latestData + replaced = true + } else { + sourceData.AppInfoLatest = append(sourceData.AppInfoLatest, latestData) + } + + // Remove from pending + newPending := make([]*types.AppInfoLatestPendingData, 0, len(sourceData.AppInfoLatestPending)) + for _, p := range sourceData.AppInfoLatestPending { + pID := "" + if p != nil && p.RawData != nil { + pID = p.RawData.AppID + if pID == "" { + pID = p.RawData.ID + } + if pID == "" { + pID = p.RawData.Name + } + } + if pID != appID { + newPending = append(newPending, p) + } + } + sourceData.AppInfoLatestPending = newPending + + // Remove the same app from render-failed list after successful move to latest. + // Keep Pending/Failed disjoint and avoid stale failed entries. + newFailed := make([]*types.AppRenderFailedData, 0, len(sourceData.AppRenderFailed)) + for _, f := range sourceData.AppRenderFailed { + if f == nil || f.RawData == nil { + newFailed = append(newFailed, f) + continue + } + matchedByID := appID != "" && (f.RawData.ID == appID || f.RawData.AppID == appID) + matchedByName := appName != "" && f.RawData.Name == appName + if matchedByID || matchedByName { + continue + } + newFailed = append(newFailed, f) + } + sourceData.AppRenderFailed = newFailed + + return oldVersion, replaced, true } -// TryRLock attempts to acquire the cache manager's read lock without blocking -// Returns true if lock acquired, false if would block -func (cm *CacheManager) TryRLock() bool { - return cm.mutex.TryRLock() +// UpdateSourceOthers updates the Others data for a given sourceID across all users. +// If a user or source doesn't exist, it is created. +func (cm *CacheManager) UpdateSourceOthers(sourceID string, others *types.Others) { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + if len(cm.cache.Users) == 0 { + systemUserID := "system" + cm.cache.Users[systemUserID] = NewUserDataEx(systemUserID) + glog.V(3).Infof("No existing users found, created system user as fallback") + } + + for userID, userData := range cm.cache.Users { + if userData.Sources == nil { + userData.Sources = make(map[string]*SourceData) + } + if userData.Sources[sourceID] == nil { + userData.Sources[sourceID] = NewSourceData() + } + userData.Sources[sourceID].Others = others + glog.V(3).Infof("Updated Others data in cache for user %s, source %s", userID, sourceID) + } } -func (cm *CacheManager) GetUserDataNoLock(userID string) *UserData { - if cm.cache == nil { - return nil +// RemoveAppFromAllSources removes an app (by name) from AppInfoLatest and +// AppInfoLatestPending across all users for the given sourceID. Returns the +// total number of users affected. +func (cm *CacheManager) RemoveAppFromAllSources(appName, sourceID string) int { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + affected := 0 + for _, userData := range cm.cache.Users { + sourceData, exists := userData.Sources[sourceID] + if !exists { + continue + } + + origLatest := len(sourceData.AppInfoLatest) + origPending := len(sourceData.AppInfoLatestPending) + + newLatest := make([]*types.AppInfoLatestData, 0, origLatest) + for _, app := range sourceData.AppInfoLatest { + if app == nil || app.RawData == nil || app.RawData.Name != appName { + newLatest = append(newLatest, app) + } + } + + newPending := make([]*types.AppInfoLatestPendingData, 0, origPending) + for _, app := range sourceData.AppInfoLatestPending { + if app == nil || app.RawData == nil || app.RawData.Name != appName { + newPending = append(newPending, app) + } + } + + if len(newLatest) != origLatest || len(newPending) != origPending { + sourceData.AppInfoLatest = newLatest + sourceData.AppInfoLatestPending = newPending + affected++ + } } + return affected +} - return cm.cache.Users[userID] +// RemoveDelistedApps removes apps whose ID is in the provided set from +// AppInfoLatest across all users and sources. Returns the total removal count. +func (cm *CacheManager) RemoveDelistedApps(delistedAppIDs map[string]bool) int { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + removedCount := 0 + for userID, userData := range cm.cache.Users { + for sourceID, sourceData := range userData.Sources { + newLatest := sourceData.AppInfoLatest[:0] + for _, app := range sourceData.AppInfoLatest { + var appID string + if app != nil && app.RawData != nil { + if app.RawData.ID != "" { + appID = app.RawData.ID + } else if app.RawData.AppID != "" { + appID = app.RawData.AppID + } else if app.RawData.Name != "" { + appID = app.RawData.Name + } + } + if delistedAppIDs[appID] { + removedCount++ + glog.V(3).Infof("Removing delisted app %s from user %s source %s", appID, userID, sourceID) + } else { + newLatest = append(newLatest, app) + } + } + sourceData.AppInfoLatest = newLatest + } + } + return removedCount } -// GetUserDataWithFallback retrieves user data with fallback mechanism -// Uses TryRLock to avoid blocking - returns nil if lock is not available immediately -func (cm *CacheManager) GetUserDataWithFallback(userID string) *UserData { - if !cm.mutex.TryRLock() { - // Lock not available immediately, return nil to avoid blocking - glog.Warningf("[TryRLock] GetUserData: Read lock not available for user %s, returning nil", userID) - return nil +// CopyPendingVersionHistory finds the pending data for the given app and copies +// its VersionHistory and AppLabels into the target ApplicationInfoEntry under write lock. +// It also overwrites the pending entry with the supplied latestData fields. +func (cm *CacheManager) CopyPendingVersionHistory( + userID, sourceID, appID, appName string, + latestData *types.AppInfoLatestData, +) error { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + userData, ok := cm.cache.Users[userID] + if !ok { + return fmt.Errorf("user %s not found", userID) + } + sourceData, ok := userData.Sources[sourceID] + if !ok { + return fmt.Errorf("source %s not found for user %s", sourceID, userID) + } + + // Find the pending data + var pendingData *types.AppInfoLatestPendingData + for _, p := range sourceData.AppInfoLatestPending { + if p == nil || p.RawData == nil { + continue + } + if p.RawData.Name == appName || p.RawData.AppID == appID || p.RawData.ID == appID { + pendingData = p + break + } + } + if pendingData == nil { + return fmt.Errorf("pendingData not found for user=%s, source=%s, app=%s, appName=%s", userID, sourceID, appID, appName) } + + // Copy version history from pending to latest + if latestData.RawData != nil && pendingData.RawData != nil { + latestData.RawData.VersionHistory = pendingData.RawData.VersionHistory + if latestData.AppInfo != nil && latestData.AppInfo.AppEntry != nil { + latestData.AppInfo.AppEntry.VersionHistory = pendingData.RawData.VersionHistory + } + // Preserve appLabels from pendingData if latest doesn't have them + if len(pendingData.RawData.AppLabels) > 0 && len(latestData.RawData.AppLabels) == 0 { + latestData.RawData.AppLabels = pendingData.RawData.AppLabels + if latestData.AppInfo != nil && latestData.AppInfo.AppEntry != nil { + latestData.AppInfo.AppEntry.AppLabels = pendingData.RawData.AppLabels + } + } + } + + // Overwrite pending entry with latest data fields + pendingData.Type = latestData.Type + pendingData.Timestamp = latestData.Timestamp + pendingData.Version = latestData.Version + pendingData.RawData = latestData.RawData + pendingData.RawPackage = latestData.RawPackage + pendingData.Values = latestData.Values + pendingData.AppInfo = latestData.AppInfo + pendingData.RenderedPackage = latestData.RenderedPackage + pendingData.AppSimpleInfo = latestData.AppSimpleInfo + + return nil +} + +// FindPendingDataForApp finds a pending data entry by appID in the given user/source. +func (cm *CacheManager) FindPendingDataForApp(userID, sourceID, appID string) *types.AppInfoLatestPendingData { + cm.mutex.RLock() defer cm.mutex.RUnlock() - if cm.cache == nil { + userData, ok := cm.cache.Users[userID] + if !ok { return nil } + sourceData, ok := userData.Sources[sourceID] + if !ok { + return nil + } + for _, p := range sourceData.AppInfoLatestPending { + if p != nil && p.RawData != nil && + (p.RawData.ID == appID || p.RawData.AppID == appID || p.RawData.Name == appID) { + return p + } + } + return nil +} - return cm.cache.Users[userID] +// IsAppInLatestQueue checks if an app (by ID) with a matching version exists in AppInfoLatest. +func (cm *CacheManager) IsAppInLatestQueue(userID, sourceID, appID, version string) bool { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + userData, ok := cm.cache.Users[userID] + if !ok { + return false + } + sourceData, ok := userData.Sources[sourceID] + if !ok { + return false + } + + for _, ld := range sourceData.AppInfoLatest { + if ld == nil { + continue + } + if ld.RawData != nil { + if ld.RawData.ID == appID || ld.RawData.AppID == appID || ld.RawData.Name == appID { + if version != "" && ld.RawData.Version != version { + continue + } + return true + } + } + if ld.AppInfo != nil && ld.AppInfo.AppEntry != nil { + if ld.AppInfo.AppEntry.ID == appID || ld.AppInfo.AppEntry.AppID == appID || ld.AppInfo.AppEntry.Name == appID { + if version != "" && ld.AppInfo.AppEntry.Version != version { + continue + } + return true + } + } + } + return false } -// GetAllUsersDataWithFallback returns all users data with fallback mechanism -// Uses TryRLock to avoid blocking - returns empty map if lock is not available immediately -func (cm *CacheManager) GetAllUsersDataWithFallback() map[string]*UserData { - if !cm.mutex.TryRLock() { - // Lock not available immediately, return empty map to avoid blocking - glog.Warning("[TryRLock] GetAllUsersData: Read lock not available, returning empty map") - return make(map[string]*UserData) +// IsAppInRenderFailedList checks if an app exists in the render failed list. +// When version is provided, only same-version failures will be treated as a match. +func (cm *CacheManager) IsAppInRenderFailedList(userID, sourceID, appID, appName, version string) bool { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + userData, ok := cm.cache.Users[userID] + if !ok { + return false } + sourceData, ok := userData.Sources[sourceID] + if !ok { + return false + } + for _, fd := range sourceData.AppRenderFailed { + if fd == nil || fd.RawData == nil { + continue + } + + matchedByID := appID != "" && (fd.RawData.ID == appID || fd.RawData.AppID == appID || fd.RawData.Name == appID) + matchedByName := appName != "" && fd.RawData.Name == appName + if !matchedByID && !matchedByName { + continue + } + + // If incoming version is known, only block when failed record has the same known version. + if version != "" { + failedVersion := fd.Version + if failedVersion == "" { + failedVersion = fd.RawData.Version + } + if failedVersion == "" || failedVersion != version { + continue + } + } + return true + } + return false +} + +// HasSourceData returns true if any user has non-empty AppInfoLatest or +// AppInfoLatestPending data for the given sourceID. +func (cm *CacheManager) HasSourceData(sourceID string) bool { + cm.mutex.RLock() defer cm.mutex.RUnlock() - if cm.cache == nil { - return make(map[string]*UserData) + for _, userData := range cm.cache.Users { + if sourceData, exists := userData.Sources[sourceID]; exists { + if len(sourceData.AppInfoLatestPending) > 0 || len(sourceData.AppInfoLatest) > 0 { + return true + } + } } + return false +} - result := make(map[string]*UserData) +// IsAppInstalled returns true if any user has the named app in a non-uninstalled +// state in AppStateLatest for the given sourceID. +func (cm *CacheManager) IsAppInstalled(sourceID, appName string) bool { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + for _, userData := range cm.cache.Users { + if sourceData, ok := userData.Sources[sourceID]; ok { + for _, appState := range sourceData.AppStateLatest { + if appState != nil && appState.Status.Name == appName && appState.Status.State != "uninstalled" { + return true + } + } + } + } + return false +} + +// GetSourceOthersHash returns the Others.Hash stored for the given sourceID +// in the first user that has a valid hash. +func (cm *CacheManager) GetSourceOthersHash(sourceID string) string { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + for _, userData := range cm.cache.Users { + if sourceData, exists := userData.Sources[sourceID]; exists { + if sourceData.Others != nil && sourceData.Others.Hash != "" { + return sourceData.Others.Hash + } + } + } + return "" +} + +// ListActiveUsers returns information about all active (existing) users. +func (cm *CacheManager) ListActiveUsers() []map[string]string { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + var usersInfo []map[string]string + for _, v := range cm.cache.Users { + if v.UserInfo != nil && v.UserInfo.Exists { + ui := map[string]string{ + "id": v.UserInfo.Id, + "name": v.UserInfo.Name, + "role": v.UserInfo.Role, + "status": v.UserInfo.Status, + } + usersInfo = append(usersInfo, ui) + } + } + return usersInfo +} + +// CollectAllPendingItems returns all non-nil pending items across all users and sources. +type PendingItem struct { + UserID string + SourceID string + Pending *types.AppInfoLatestPendingData +} + +func (cm *CacheManager) CollectAllPendingItems() []PendingItem { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + var items []PendingItem for userID, userData := range cm.cache.Users { - result[userID] = userData + for sourceID, sourceData := range userData.Sources { + for _, pd := range sourceData.AppInfoLatestPending { + if pd != nil { + items = append(items, PendingItem{userID, sourceID, pd}) + } + } + } } - return result + return items } -// GetCache returns the underlying cache data -func (cm *CacheManager) GetCache() *CacheData { - return cm.cache +// RestoreRetryableFailedToPending moves up to `limit` items from AppRenderFailed +// back to AppInfoLatestPending (FIFO order) so they can be retried by the hydrator. +// Items are removed from AppRenderFailed to avoid duplicates. +// Returns the number of items restored. +func (cm *CacheManager) RestoreRetryableFailedToPending(limit int) int { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + restored := 0 + for _, userData := range cm.cache.Users { + if restored >= limit { + break + } + for _, sourceData := range userData.Sources { + if restored >= limit { + break + } + i := 0 + for i < len(sourceData.AppRenderFailed) && restored < limit { + fd := sourceData.AppRenderFailed[i] + if fd == nil || fd.RawData == nil { + i++ + continue + } + sourceData.AppInfoLatestPending = append(sourceData.AppInfoLatestPending, &types.AppInfoLatestPendingData{ + Type: types.AppInfoLatestPending, + Timestamp: fd.Timestamp, + Version: fd.Version, + RawData: fd.RawData, + RawPackage: fd.RawPackage, + Values: fd.Values, + AppInfo: fd.AppInfo, + RenderedPackage: fd.RenderedPackage, + }) + sourceData.AppRenderFailed = append(sourceData.AppRenderFailed[:i], sourceData.AppRenderFailed[i+1:]...) + restored++ + } + } + } + if restored > 0 { + glog.V(2).Infof("RestoreRetryableFailedToPending: restored %d failed apps to pending queue", restored) + } + return restored +} + +// SnapshotSourcePending returns shallow copies of the pending and latest slices +// for the given user/source, safe for iteration outside the lock. +func (cm *CacheManager) SnapshotSourcePending(userID, sourceID string) ( + pending []*types.AppInfoLatestPendingData, + latest []*types.AppInfoLatestData, +) { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + userData, exists := cm.cache.Users[userID] + if !exists { + return nil, nil + } + sourceData, exists := userData.Sources[sourceID] + if !exists { + return nil, nil + } + + pending = make([]*types.AppInfoLatestPendingData, len(sourceData.AppInfoLatestPending)) + copy(pending, sourceData.AppInfoLatestPending) + latest = make([]*types.AppInfoLatestData, len(sourceData.AppInfoLatest)) + copy(latest, sourceData.AppInfoLatest) + return pending, latest } // SyncRequest represents a request to sync data to Redis @@ -203,7 +723,7 @@ func NewCacheManager(redisClient *RedisClient, userConfig *UserConfig) *CacheMan // Start initializes the cache by loading data from Redis and starts the sync worker func (cm *CacheManager) Start() error { - glog.V(3).Infof("Starting cache manager") + glog.V(2).Infof("Starting cache manager") // Load cache data from Redis if ClearCache is false if !cm.userConfig.ClearCache { @@ -282,7 +802,7 @@ func (cm *CacheManager) Start() error { // Start periodic cleanup of AppRenderFailed data (every 5 minutes) cm.cleanupTicker = time.NewTicker(5 * time.Minute) - go cm.cleanupWorker() + go cm.cleanupWorker() // + glog.V(3).Infof("Cache manager started successfully") return nil @@ -355,11 +875,7 @@ func (cm *CacheManager) processSyncRequest(req SyncRequest) { // GetUserData retrieves user data from cache func (cm *CacheManager) GetUserData(userID string) *UserData { - glog.V(4).Infof("[LOCK] cm.mutex.TryRLock() @184 Start") - if !cm.mutex.TryRLock() { - glog.Warningf("[TryRLock] GetUserData: Read lock not available for user %s, returning nil", userID) - return nil - } + cm.mutex.RLock() defer cm.mutex.RUnlock() return cm.cache.Users[userID] @@ -372,11 +888,7 @@ func (cm *CacheManager) getUserData(userID string) *UserData { // GetSourceData retrieves source data from cache func (cm *CacheManager) GetSourceData(userID, sourceID string) *SourceData { - glog.V(4).Infof("[LOCK] cm.mutex.TryRLock() @197 Start") - if !cm.mutex.TryRLock() { - glog.Warningf("[TryRLock] GetSourceData: Read lock not available for user %s, source %s, returning nil", userID, sourceID) - return nil - } + cm.mutex.RLock() defer cm.mutex.RUnlock() if userData, exists := cm.cache.Users[userID]; exists { @@ -388,9 +900,7 @@ func (cm *CacheManager) GetSourceData(userID, sourceID string) *SourceData { // GetAppVersionFromState retrieves app version from AppStateLatest in the specified source // Returns version and found flag func (cm *CacheManager) GetAppVersionFromState(userID, sourceID, appName string) (version string, found bool) { - if !cm.mutex.TryRLock() { - return "", false - } + cm.mutex.RLock() defer cm.mutex.RUnlock() userData := cm.cache.Users[userID] @@ -422,24 +932,6 @@ func (cm *CacheManager) getSourceData(userID, sourceID string) *SourceData { return nil } -// SetHydrationNotifier sets the hydration notifier for real-time updates -func (cm *CacheManager) setHydrationNotifierInternal(notifier HydrationNotifier) { - glog.V(4).Infof("[LOCK] cm.mutex.Lock() @216 Start") - lockStart := time.Now() - cm.mutex.Lock() - glog.V(4).Infof("[LOCK] cm.mutex.Lock() @216 Success (wait=%v)", time.Since(lockStart)) - defer cm.mutex.Unlock() - cm.hydrationNotifier = notifier - glog.V(4).Infof("Hydration notifier set successfully") -} - -// SetHydrationNotifier sets the hydration notifier for real-time updates -func (cm *CacheManager) SetHydrationNotifier(notifier HydrationNotifier) { - go func() { - cm.setHydrationNotifierInternal(notifier) - }() -} - // updateAppStateLatest updates or adds a single app state based on name matching func (cm *CacheManager) updateAppStateLatest(userID, sourceID string, sourceData *SourceData, newAppState *types.AppStateLatestData) { if newAppState == nil { @@ -525,7 +1017,7 @@ func (cm *CacheManager) updateAppStateLatest(userID, sourceID string, sourceData Source: sourceID, } - if err := cm.dataSender.SendAppInfoUpdate(update); err != nil { + if err := cm.dataSender.SendAppInfoUpdate(update, "cache"); err != nil { glog.Errorf("Force push state update for app %s failed: %v", newAppState.Status.Name, err) } else { glog.V(3).Infof("Force pushed state update for app %s due to EntranceStatuses fallback (only metadata changed)", newAppState.Status.Name) @@ -554,13 +1046,8 @@ func (cm *CacheManager) updateAppStateLatest(userID, sourceID string, sourceData } func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType AppDataType, data map[string]interface{}) error { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @269 Start") - if !cm.mutex.TryLock() { - glog.Warningf("[TryLock] setAppDataInternal: Write lock not available for user %s, source %s, type %v, skipping", userID, sourceID, dataType) - return fmt.Errorf("write lock not available") - } + cm.mutex.Lock() cm.updateLockStats("lock") - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @269 Success") // Watchdog: warn if write lock is held >1s watchdogFired := make(chan struct{}, 1) timer := time.AfterFunc(1*time.Second, func() { @@ -897,18 +1384,98 @@ func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType App appData.Timestamp = time.Now().Unix() sourceData.AppInfoLatest = append(sourceData.AppInfoLatest, appData) case AppInfoLatestPending: - // Clear existing AppInfoLatestPending list before adding new data - // This ensures we don't accumulate old data when hash doesn't match + // Build version map from AppInfoLatest to skip apps with unchanged versions + latestVersionMap := make(map[string]string) + for _, latestApp := range sourceData.AppInfoLatest { + if latestApp == nil || latestApp.RawData == nil { + continue + } + v := latestApp.RawData.Version + if v == "" { + continue + } + if latestApp.RawData.Name != "" { + latestVersionMap[latestApp.RawData.Name] = v + } + if latestApp.RawData.AppID != "" { + latestVersionMap[latestApp.RawData.AppID] = v + } + if latestApp.RawData.ID != "" { + latestVersionMap[latestApp.RawData.ID] = v + } + } + // Build version map from AppRenderFailed to avoid re-adding the same failed app + // into Pending on every sync cycle. + failedVersionMap := make(map[string]string) + for _, failedApp := range sourceData.AppRenderFailed { + if failedApp == nil || failedApp.RawData == nil { + continue + } + v := failedApp.Version + if v == "" { + v = failedApp.RawData.Version + } + if failedApp.RawData.Name != "" { + failedVersionMap[failedApp.RawData.Name] = v + } + if failedApp.RawData.AppID != "" { + failedVersionMap[failedApp.RawData.AppID] = v + } + if failedApp.RawData.ID != "" { + failedVersionMap[failedApp.RawData.ID] = v + } + } + originalCount := len(sourceData.AppInfoLatestPending) - sourceData.AppInfoLatestPending = sourceData.AppInfoLatestPending[:0] // Clear the slice + sourceData.AppInfoLatestPending = sourceData.AppInfoLatestPending[:0] glog.V(3).Infof("Cleared %d existing AppInfoLatestPending entries for user=%s, source=%s", originalCount, userID, sourceID) - // Check if this is a complete market data structure + shouldSkipApp := func(appData *AppInfoLatestPendingData) bool { + if appData == nil || appData.RawData == nil { + return false + } + incomingVersion := appData.RawData.Version + if incomingVersion != "" { + if name := appData.RawData.Name; name != "" { + if existing, ok := latestVersionMap[name]; ok && existing == incomingVersion { + return true + } + } + if id := appData.RawData.AppID; id != "" { + if existing, ok := latestVersionMap[id]; ok && existing == incomingVersion { + return true + } + } + if id := appData.RawData.ID; id != "" { + if existing, ok := latestVersionMap[id]; ok && existing == incomingVersion { + return true + } + } + } + + // Skip app only when the same app-version is already in render-failed. + // Unknown versions should not block upgrades/new retries. + matchFailed := func(key string) bool { + if key == "" || incomingVersion == "" { + return false + } + failedVersion, ok := failedVersionMap[key] + if !ok || failedVersion == "" { + return false + } + return failedVersion == incomingVersion + } + if matchFailed(appData.RawData.Name) || matchFailed(appData.RawData.AppID) || matchFailed(appData.RawData.ID) { + return true + } + return false + } + + skippedCount := 0 + if appsData, hasApps := data["apps"].(map[string]interface{}); hasApps { - // This is complete market data, extract individual apps glog.V(3).Infof("Processing complete market data with %d apps for user=%s, source=%s", len(appsData), userID, sourceID) - // Also store the "others" data (hash, version, topics, etc.) others := &types.Others{} if version, ok := data["version"].(string); ok { others.Version = version @@ -916,8 +1483,6 @@ func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType App if hash, ok := data["hash"].(string); ok { others.Hash = hash } - - // Extract topics, recommends, pages if present if topics, ok := data["topics"].(map[string]interface{}); ok { for _, topicData := range topics { if topicMap, ok := topicData.(map[string]interface{}); ok { @@ -925,7 +1490,6 @@ func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType App if name, ok := topicMap["name"].(string); ok { topic.Name = name } - // Extract topic data if present if data, ok := topicMap["data"].(map[string]interface{}); ok { topic.Data = make(map[string]*types.TopicData) for lang, topicDataInterface := range data { @@ -942,82 +1506,89 @@ func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType App } } } - - // Store others data in source sourceData.Others = others - // Process each individual app for appID, appDataInterface := range appsData { if appDataMap, ok := appDataInterface.(map[string]interface{}); ok { - glog.V(3).Infof("DEBUG: CALL POINT 1 - Processing app %s for user=%s, source=%s", appID, userID, sourceID) - glog.V(3).Infof("DEBUG: CALL POINT 1 - App data before calling NewAppInfoLatestPendingDataFromLegacyData: %+v", appDataMap) appData := NewAppInfoLatestPendingDataFromLegacyData(appDataMap) - if appData != nil { - appData.Timestamp = time.Now().Unix() - sourceData.AppInfoLatestPending = append(sourceData.AppInfoLatestPending, appData) - glog.V(2).Infof("Added app %s for user=%s, source=%s", appID, userID, sourceID) - } else { - glog.Warningf("Failed to create app data for app %s (user=%s, source=%s)", appID, userID, sourceID) + if appData == nil { + continue } + if shouldSkipApp(appData) { + skippedCount++ + continue + } + appData.Timestamp = time.Now().Unix() + sourceData.AppInfoLatestPending = append(sourceData.AppInfoLatestPending, appData) + glog.V(3).Infof("Added app %s for user=%s, source=%s", appID, userID, sourceID) } } - - glog.V(3).Infof("Successfully processed %d apps from market data for user=%s, source=%s", len(sourceData.AppInfoLatestPending), userID, sourceID) } else { - // This might be market data with nested apps structure, try to extract apps - glog.V(3).Infof("DEBUG: CALL POINT 2 - Processing potential market data for user=%s, source=%s", userID, sourceID) - glog.V(3).Infof("DEBUG: CALL POINT 2 - Data before processing: %+v", data) - - // Check if this is market data with nested structure if dataSection, hasData := data["data"].(map[string]interface{}); hasData { if appsData, hasApps := dataSection["apps"].(map[string]interface{}); hasApps { - // This is market data with apps - process each app individually - glog.V(3).Infof("DEBUG: CALL POINT 2 - Found nested apps structure with %d apps", len(appsData)) for appID, appDataInterface := range appsData { if appDataMap, ok := appDataInterface.(map[string]interface{}); ok { - glog.V(3).Infof("DEBUG: CALL POINT 2 - Processing app %s", appID) appData := NewAppInfoLatestPendingDataFromLegacyData(appDataMap) - if appData != nil { - appData.Timestamp = time.Now().Unix() - sourceData.AppInfoLatestPending = append(sourceData.AppInfoLatestPending, appData) - glog.V(3).Infof("Added app %s for user=%s, source=%s", appID, userID, sourceID) - } else { - glog.Warningf("Failed to create app data for app %s (user=%s, source=%s)", appID, userID, sourceID) + if appData == nil { + continue + } + if shouldSkipApp(appData) { + skippedCount++ + continue } + appData.Timestamp = time.Now().Unix() + sourceData.AppInfoLatestPending = append(sourceData.AppInfoLatestPending, appData) + glog.V(3).Infof("Added app %s for user=%s, source=%s", appID, userID, sourceID) } } - glog.V(2).Infof("Successfully processed %d apps from nested market data for user=%s, source=%s", len(sourceData.AppInfoLatestPending), userID, sourceID) } else { glog.Warningf("Market data found but no apps section for user=%s, source=%s", userID, sourceID) } } else { - // This might be actual single app data, try to process directly - glog.V(3).Infof("DEBUG: CALL POINT 2 - Trying as single app data for user=%s, source=%s", userID, sourceID) appData := NewAppInfoLatestPendingDataFromLegacyData(data) if appData == nil { - glog.Warningf("Failed to create AppInfoLatestPendingData from data for user=%s, source=%s - not recognized as app data or market data", userID, sourceID) + glog.Warningf("Failed to create AppInfoLatestPendingData for user=%s, source=%s", userID, sourceID) return fmt.Errorf("invalid app data: missing required identifiers (id, name, or appID)") } - - appData.Timestamp = time.Now().Unix() - sourceData.AppInfoLatestPending = append(sourceData.AppInfoLatestPending, appData) - glog.V(2).Infof("Successfully processed single app data for user=%s, source=%s", userID, sourceID) + if !shouldSkipApp(appData) { + appData.Timestamp = time.Now().Unix() + sourceData.AppInfoLatestPending = append(sourceData.AppInfoLatestPending, appData) + } else { + skippedCount++ + } } } - glog.V(2).Infof("Updated AppInfoLatestPending list with %d new entries for user=%s, source=%s", - len(sourceData.AppInfoLatestPending), userID, sourceID) + glog.V(2).Infof("Updated AppInfoLatestPending: %d new, %d skipped (unchanged version or in render-failed) for user=%s, source=%s", + len(sourceData.AppInfoLatestPending), skippedCount, userID, sourceID) - // Notify hydrator about pending data update for immediate task creation - if cm.hydrationNotifier != nil && len(sourceData.AppInfoLatestPending) > 0 { - glog.V(3).Infof("Notifying hydrator about pending data update for user=%s, source=%s", userID, sourceID) - go cm.hydrationNotifier.NotifyPendingDataUpdate(userID, sourceID, data) - } case types.AppRenderFailed: // Handle render failed data - this is typically set by the hydrator when tasks fail if failedAppData, hasFailedApp := data["failed_app"].(*types.AppRenderFailedData); hasFailedApp { - sourceData.AppRenderFailed = append(sourceData.AppRenderFailed, failedAppData) - glog.V(3).Infof("Added render failed app for user=%s, source=%s, app=%s, reason=%s", + if failedAppData == nil || failedAppData.RawData == nil { + glog.Errorf("Invalid render failed data: nil failed app or raw data for user=%s, source=%s", userID, sourceID) + return fmt.Errorf("invalid render failed data: nil failed app or raw data") + } + + replaced := false + for i, existing := range sourceData.AppRenderFailed { + if existing == nil || existing.RawData == nil { + continue + } + matchedByID := (failedAppData.RawData.ID != "" && existing.RawData.ID == failedAppData.RawData.ID) || + (failedAppData.RawData.AppID != "" && existing.RawData.AppID == failedAppData.RawData.AppID) + matchedByName := failedAppData.RawData.Name != "" && existing.RawData.Name == failedAppData.RawData.Name + if matchedByID || matchedByName { + sourceData.AppRenderFailed[i] = failedAppData + replaced = true + break + } + } + + if !replaced { + sourceData.AppRenderFailed = append(sourceData.AppRenderFailed, failedAppData) + } + glog.V(3).Infof("Upserted render failed app for user=%s, source=%s, app=%s, reason=%s", userID, sourceID, failedAppData.RawData.AppID, failedAppData.FailureReason) } else { glog.Errorf("Invalid render failed data format for user=%s, source=%s", userID, sourceID) @@ -1036,8 +1607,9 @@ func (cm *CacheManager) setAppDataInternal(userID, sourceID string, dataType App return nil } -func (cm *CacheManager) SetAppData(userID, sourceID string, dataType AppDataType, data map[string]interface{}) error { +func (cm *CacheManager) SetAppData(userID, sourceID string, dataType AppDataType, data map[string]interface{}, tracing string) error { + glog.Infof("[SetAppData] user: %s, source: %s, dataType: %s, trace: %s", userID, sourceID, dataType, tracing) // go func() { if err := cm.setAppDataInternal(userID, sourceID, dataType, data); err != nil { glog.Errorf("Failed to set app data in goroutine: %v", err) @@ -1048,13 +1620,8 @@ func (cm *CacheManager) SetAppData(userID, sourceID string, dataType AppDataType } func (cm *CacheManager) setLocalAppDataInternal(userID, sourceID string, dataType AppDataType, data types.AppInfoLatestData) error { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @SetLocalAppData Start") - if !cm.mutex.TryLock() { - glog.Warningf("[TryLock] setLocalAppDataInternal: Write lock not available for user %s, source %s, type %v, skipping", userID, sourceID, dataType) - return fmt.Errorf("write lock not available") - } + cm.mutex.Lock() cm.updateLockStats("lock") - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @SetLocalAppData Success") _wd := cm.startLockWatchdog("@SetLocalAppData") defer func() { @@ -1132,11 +1699,7 @@ func (cm *CacheManager) SetLocalAppData(userID, sourceID string, dataType AppDat // GetAppData retrieves app data from cache using single global lock func (cm *CacheManager) GetAppData(userID, sourceID string, dataType AppDataType) interface{} { - glog.V(4).Infof("[LOCK] cm.mutex.TryRLock() @543 Start") - if !cm.mutex.TryRLock() { - glog.Warningf("[TryRLock] GetAppData: Read lock not available for user %s, source %s, type %v, returning nil", userID, sourceID, dataType) - return nil - } + cm.mutex.RLock() defer cm.mutex.RUnlock() if userData, exists := cm.cache.Users[userID]; exists { @@ -1161,12 +1724,7 @@ func (cm *CacheManager) GetAppData(userID, sourceID string, dataType AppDataType // RemoveUserData removes user data from cache and Redis func (cm *CacheManager) removeUserDataInternal(userID string) error { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @568 Start") - if !cm.mutex.TryLock() { - glog.Warningf("[TryLock] removeUserDataInternal: Write lock not available for user %s, skipping", userID) - return fmt.Errorf("write lock not available") - } - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @568 Success") + cm.mutex.Lock() _wd := cm.startLockWatchdog("@568:removeUser") defer func() { cm.mutex.Unlock(); _wd() }() @@ -1195,12 +1753,7 @@ func (cm *CacheManager) RemoveUserData(userID string) error { // AddUser adds a new user to the cache func (cm *CacheManager) addUserInternal(userID string) error { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @AddUser Start") - if !cm.mutex.TryLock() { - glog.Warningf("[TryLock] addUserInternal: Write lock not available for user %s, skipping", userID) - return fmt.Errorf("write lock not available") - } - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @AddUser Success") + cm.mutex.Lock() _wd := cm.startLockWatchdog("@AddUser") defer func() { cm.mutex.Unlock(); _wd() }() @@ -1251,11 +1804,7 @@ func (cm *CacheManager) AddUser(userID string) error { // GetCacheStats returns cache statistics using single global lock func (cm *CacheManager) GetCacheStats() map[string]interface{} { - glog.V(4).Infof("[LOCK] cm.mutex.TryRLock() @586 Start") - if !cm.mutex.TryRLock() { - glog.Warning("[TryRLock] GetCacheStats: Read lock not available, returning empty stats") - return map[string]interface{}{"error": "lock not available"} - } + cm.mutex.RLock() defer cm.mutex.RUnlock() stats := make(map[string]interface{}) @@ -1285,29 +1834,13 @@ func (cm *CacheManager) requestSync(req SyncRequest) { } // ForceSync forces immediate synchronization of all data to Redis -// Rate limited to once per minute to prevent excessive Redis operations func (cm *CacheManager) ForceSync() error { - // Check rate limiting first - cm.forceSyncMutex.Lock() - now := time.Now() - if !cm.lastForceSync.IsZero() && now.Sub(cm.lastForceSync) < time.Minute { - cm.forceSyncMutex.Unlock() - glog.V(4).Infof("ForceSync: Rate limited, last sync was %v ago", now.Sub(cm.lastForceSync)) - return fmt.Errorf("force sync rate limited, please wait %v", time.Minute-now.Sub(cm.lastForceSync)) - } - cm.lastForceSync = now - cm.forceSyncMutex.Unlock() - glog.V(2).Infof("Force syncing all cache data to Redis") // 1. Quickly obtain a data snapshot to minimize lock holding time var userDataSnapshot map[string]*UserData func() { - glog.V(4).Infof("[LOCK] cm.mutex.TryRLock() @617 Start") - if !cm.mutex.TryRLock() { - glog.Warning("[TryRLock] ForceSync: Read lock not available, returning error") - return - } + cm.mutex.RLock() defer func() { cm.mutex.RUnlock() glog.V(4).Infof("[LOCK] cm.mutex.RUnlock() @617 End") @@ -1354,39 +1887,9 @@ func (cm *CacheManager) ForceSync() error { } } -// CanForceSync checks if ForceSync can be executed (not rate limited) -func (cm *CacheManager) CanForceSync() bool { - cm.forceSyncMutex.Lock() - defer cm.forceSyncMutex.Unlock() - - now := time.Now() - return cm.lastForceSync.IsZero() || now.Sub(cm.lastForceSync) >= time.Minute -} - -// GetForceSyncCooldown returns the remaining cooldown time for ForceSync -func (cm *CacheManager) GetForceSyncCooldown() time.Duration { - cm.forceSyncMutex.Lock() - defer cm.forceSyncMutex.Unlock() - - now := time.Now() - if cm.lastForceSync.IsZero() { - return 0 - } - - elapsed := now.Sub(cm.lastForceSync) - if elapsed >= time.Minute { - return 0 - } - - return time.Minute - elapsed -} - // GetAllUsersData returns all users data from cache using single global lock func (cm *CacheManager) GetAllUsersData() map[string]*UserData { - if !cm.mutex.TryRLock() { - glog.Warning("[TryRLock] GetAllUsersData: Read lock not available, returning empty map") - return make(map[string]*UserData) - } + cm.mutex.RLock() defer cm.mutex.RUnlock() if cm.cache == nil { @@ -1409,15 +1912,8 @@ func (cm *CacheManager) GetAllUsersData() map[string]*UserData { // HasUserStateDataForSource checks if any user has non-empty state data for a specific source func (cm *CacheManager) HasUserStateDataForSource(sourceID string) bool { - glog.V(4).Infof("[LOCK] cm.mutex.TryRLock() @HasUserStateDataForSource Start") - if !cm.mutex.TryRLock() { - glog.Warningf("[TryRLock] HasUserStateDataForSource: Read lock not available for source %s, returning false", sourceID) - return false - } - defer func() { - cm.mutex.RUnlock() - glog.V(4).Infof("[LOCK] cm.mutex.RUnlock() @HasUserStateDataForSource End") - }() + cm.mutex.RLock() + defer cm.mutex.RUnlock() if cm.cache == nil { return false @@ -1440,12 +1936,7 @@ func (cm *CacheManager) HasUserStateDataForSource(sourceID string) bool { // UpdateUserConfig updates the user configuration and ensures all users have data structures func (cm *CacheManager) updateUserConfigInternal(newUserConfig *UserConfig) error { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @660 Start") - if !cm.mutex.TryLock() { - glog.Warning("[TryLock] updateUserConfigInternal: Write lock not available, skipping") - return fmt.Errorf("write lock not available") - } - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @660 Success") + cm.mutex.Lock() _wd := cm.startLockWatchdog("@660:updateUserConfig") defer func() { cm.mutex.Unlock(); _wd() }() @@ -1515,12 +2006,7 @@ func (cm *CacheManager) UpdateUserConfig(newUserConfig *UserConfig) error { // SyncUserListToCache ensures all users from current userConfig have initialized data structures func (cm *CacheManager) syncUserListToCacheInternal() error { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @718 Start") - if !cm.mutex.TryLock() { - glog.Warning("[TryLock] syncUserListToCacheInternal: Write lock not available, skipping") - return fmt.Errorf("write lock not available") - } - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @718 Success") + cm.mutex.Lock() _wd := cm.startLockWatchdog("@718:syncUserList") defer func() { cm.mutex.Unlock(); _wd() }() @@ -1568,11 +2054,7 @@ func (cm *CacheManager) SyncUserListToCache() error { // CleanupInvalidPendingData removes invalid pending data entries that lack required identifiers func (cm *CacheManager) cleanupInvalidPendingDataInternal() int { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @751 Start") - if !cm.mutex.TryLock() { - glog.Warning("[TryLock] CleanupInvalidPendingData: Write lock not available, skipping cleanup") - return 0 - } + cm.mutex.Lock() _wd := cm.startLockWatchdog("@751:cleanupInvalidPending") defer func() { cm.mutex.Unlock(); _wd() }() @@ -1782,14 +2264,10 @@ func (cm *CacheManager) enhanceAppStateDataWithUrls(data map[string]interface{}, return enhancedData } -// GetLockStats returns current lock statistics for monitoring -func (cm *CacheManager) GetLockStats() map[string]interface{} { - glog.V(4).Infof("[LOCK] cm.lockStats.Lock() GetLockStats Start") +// getLockStats returns current lock statistics for internal monitoring +func (cm *CacheManager) getLockStats() map[string]interface{} { cm.lockStats.Lock() - defer func() { - cm.lockStats.Unlock() - glog.V(4).Infof("[LOCK] cm.lockStats.Unlock() GetLockStats End") - }() + defer cm.lockStats.Unlock() stats := make(map[string]interface{}) stats["last_lock_time"] = cm.lockStats.lastLockTime @@ -1798,7 +2276,6 @@ func (cm *CacheManager) GetLockStats() map[string]interface{} { stats["lock_count"] = cm.lockStats.lockCount stats["unlock_count"] = cm.lockStats.unlockCount - // Check for potential lock issues if cm.lockStats.lockCount > cm.lockStats.unlockCount { stats["lock_imbalance"] = cm.lockStats.lockCount - cm.lockStats.unlockCount stats["potential_deadlock"] = true @@ -1807,7 +2284,6 @@ func (cm *CacheManager) GetLockStats() map[string]interface{} { stats["potential_deadlock"] = false } - // Check if lock has been held for too long if !cm.lockStats.lastLockTime.IsZero() && cm.lockStats.lockDuration > 30*time.Second { stats["long_lock_duration"] = true stats["current_lock_duration"] = time.Since(cm.lockStats.lastLockTime) @@ -1818,15 +2294,12 @@ func (cm *CacheManager) GetLockStats() map[string]interface{} { return stats } -// DumpLockInfo prints lock stats and all goroutine stacks for diagnosing lock holders -func (cm *CacheManager) DumpLockInfo(reason string) { +// dumpLockInfo prints lock stats and all goroutine stacks for diagnosing lock holders +func (cm *CacheManager) dumpLockInfo(reason string) { glog.V(4).Infof("LOCK DIAG: reason=%s", reason) - // Print current lock stats snapshot - stats := cm.GetLockStats() + stats := cm.getLockStats() glog.V(4).Infof("LOCK DIAG: stats=%v", stats) - // Dump all goroutine stacks to identify who might be holding the lock - // Note: This is safe but can be large; only used on timeouts. buf := make([]byte, 1<<20) n := runtime.Stack(buf, true) glog.V(4).Infof("LOCK DIAG: goroutine dump (%d bytes)\n%s", n, string(buf[:n])) @@ -1858,11 +2331,7 @@ func (cm *CacheManager) updateLockStats(lockType string) { // RemoveAppStateData removes a specific app from AppStateLatest for a user and source func (cm *CacheManager) removeAppStateDataInternal(userID, sourceID, appName string) error { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @RemoveAppStateData Start") - if !cm.mutex.TryLock() { - glog.Warningf("[TryLock] RemoveAppStateData: Write lock not available for user %s, source %s, app %s, skipping", userID, sourceID, appName) - return fmt.Errorf("write lock not available") - } + cm.mutex.Lock() _wd := cm.startLockWatchdog("@RemoveAppStateData") defer func() { cm.mutex.Unlock(); _wd() }() @@ -1915,12 +2384,7 @@ func (cm *CacheManager) RemoveAppStateData(userID, sourceID, appName string) err // RemoveAppInfoLatestData removes a specific app from AppInfoLatest for a user and source func (cm *CacheManager) removeAppInfoLatestDataInternal(userID, sourceID, appName string) error { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @RemoveAppInfoLatestData Start") - if !cm.mutex.TryLock() { - glog.Warningf("[TryLock] removeAppInfoLatestDataInternal: Write lock not available for user %s, source %s, app %s, skipping", userID, sourceID, appName) - return fmt.Errorf("write lock not available") - } - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @RemoveAppInfoLatestData Success") + cm.mutex.Lock() _wd := cm.startLockWatchdog("@RemoveAppInfoLatestData") defer func() { cm.mutex.Unlock(); _wd() }() @@ -2005,13 +2469,9 @@ func (cm *CacheManager) GetSettingsManager() *settings.SettingsManager { } // SyncMarketSourcesToCache synchronizes market sources to all users in cache +// todo remove watch dog func (cm *CacheManager) syncMarketSourcesToCacheInternal(sources []*settings.MarketSource) error { - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @SyncMarketSourcesToCache Start") - if !cm.mutex.TryLock() { - glog.Warningf("[TryLock] syncMarketSourcesToCacheInternal: Write lock not available, skipping") - return fmt.Errorf("write lock not available") - } - glog.V(4).Infof("[LOCK] cm.mutex.TryLock() @SyncMarketSourcesToCache Success") + cm.mutex.Lock() _wd := cm.startLockWatchdog("@SyncMarketSourcesToCache") defer func() { cm.mutex.Unlock() @@ -2084,12 +2544,7 @@ func (cm *CacheManager) SyncMarketSourcesToCache(sources []*settings.MarketSourc } func (cm *CacheManager) resynceUserInternal() error { - glog.V(4).Info("[LOCK] cm.mutex.TryLock() @resynceUserInternal Start") - if !cm.mutex.TryLock() { - glog.Warning("[TryLock] resynceUserInternal: Write lock not available, skipping") - return fmt.Errorf("write lock not available") - } - glog.V(4).Info("[LOCK] cm.mutex.TryLock() @resynceUserInternal Success") + cm.mutex.Lock() _wd := cm.startLockWatchdog("@resynceUserInternal") defer func() { cm.mutex.Unlock(); _wd() }() @@ -2147,107 +2602,53 @@ func (cm *CacheManager) cleanupWorker() { // ClearAppRenderFailedData clears all AppRenderFailed data for all users and sources func (cm *CacheManager) ClearAppRenderFailedData() { - glog.V(3).Info("INFO: Starting periodic cleanup of AppRenderFailed data") - - start := time.Now() - // 1) Short lock phase: collect keys to be cleaned and count the number - type target struct{ userID, sourceID string } - targets := make([]target, 0, 128) - counts := make(map[target]int) - - glog.V(3).Info("INFO: [Cleanup] Attempting to acquire read lock for scan phase") - if !cm.mutex.TryRLock() { - glog.Warning("[TryRLock] INFO: [Cleanup] Read lock not available for scan phase, skipping cleanup") - return - } - scanLockAcquiredAt := time.Now() - glog.V(3).Info("INFO: [Cleanup] Read lock acquired (scan). Hold minimal time") + glog.Info("INFO: [Cleanup] Starting periodic cleanup of AppRenderFailed data") + cm.mutex.RLock() if cm.cache == nil { cm.mutex.RUnlock() - glog.V(4).Info("WARN: Cache is nil, skipping AppRenderFailed cleanup") return } + type target struct{ userID, sourceID string } + targets := make([]target, 0, 128) + for userID, userData := range cm.cache.Users { for sourceID, sourceData := range userData.Sources { - if n := len(sourceData.AppRenderFailed); n > 0 { - t := target{userID: userID, sourceID: sourceID} - targets = append(targets, t) - counts[t] = n + if len(sourceData.AppRenderFailed) > 0 { + targets = append(targets, target{userID: userID, sourceID: sourceID}) } } } - // 2) Release read lock after scan cm.mutex.RUnlock() - glog.V(3).Infof("INFO: [Cleanup] Released read lock after scan (held %v), targets=%d", time.Since(scanLockAcquiredAt), len(targets)) - - // 3) Processing phase: Use batch processing to avoid lock contention - totalCleared := 0 if len(targets) == 0 { - glog.V(3).Infof("DEBUG: No AppRenderFailed entries found during periodic cleanup (took %v)", time.Since(start)) return } - // Use single write lock to batch process all targets to avoid lock contention - glog.V(3).Infof("INFO: [Cleanup] Processing %d targets in batch mode", len(targets)) - - // Use short timeout to quickly acquire write lock to avoid writer starvation - ctx, cancel := context.WithTimeout(context.Background(), 1*time.Millisecond) - defer cancel() - - // Use channel to implement non-blocking lock acquisition - lockAcquired := make(chan struct{}, 1) - lockFailed := make(chan struct{}, 1) - - // Start goroutine to attempt lock acquisition (only for very short time, give up immediately if not acquired) - go func() { - done := make(chan struct{}, 1) - go func() { - if cm.mutex.TryLock() { - done <- struct{}{} - } - }() - select { - case <-done: - // Successfully acquired lock - lockAcquired <- struct{}{} - case <-ctx.Done(): - // Failed to acquire write lock quickly, give up immediately to avoid reader starvation - lockFailed <- struct{}{} - } - }() + start := time.Now() + cm.mutex.Lock() + defer cm.mutex.Unlock() - // Wait for lock acquisition result - select { - case <-lockAcquired: - // Successfully acquired lock, batch process all targets - defer cm.mutex.Unlock() - - for _, t := range targets { - if userData, ok := cm.cache.Users[t.userID]; ok { - if sourceData, ok2 := userData.Sources[t.sourceID]; ok2 { - originalCount := len(sourceData.AppRenderFailed) - if originalCount > 0 { - sourceData.AppRenderFailed = make([]*types.AppRenderFailedData, 0) - totalCleared += originalCount - glog.V(3).Infof("INFO: [Cleanup] Cleared %d AppRenderFailed entries for user=%s, source=%s", originalCount, t.userID, t.sourceID) + count := 0 + failedAppNames := []string{} + for _, t := range targets { + if userData, ok := cm.cache.Users[t.userID]; ok { + if sourceData, ok := userData.Sources[t.sourceID]; ok { + if len(sourceData.AppRenderFailed) > 0 { + count += len(sourceData.AppRenderFailed) + for _, f := range sourceData.AppRenderFailed { + failedAppNames = append(failedAppNames, fmt.Sprintf("%s_%s_%s", t.userID, t.sourceID, f.AppInfo.AppEntry.Name)) } + sourceData.AppRenderFailed = make([]*types.AppRenderFailedData, 0) } } } - - case <-lockFailed: - glog.Error("DEBUG: [Cleanup] Failed to acquire write lock quickly, skipping cleanup to avoid reader starvation") - return } - if totalCleared > 0 { - glog.V(2).Infof("INFO: Periodic cleanup completed, cleared %d total AppRenderFailed entries in %v", totalCleared, time.Since(start)) - } else { - glog.V(3).Infof("DEBUG: No AppRenderFailed entries found during periodic cleanup (took %v)", time.Since(start)) + if count > 0 { + glog.Infof("INFO: [Cleanup] Cleared %d AppRenderFailed entries in %v, apps: %v", count, time.Since(start), failedAppNames) } } @@ -2258,16 +2659,16 @@ func (cm *CacheManager) HandlerEvent() cache.ResourceEventHandler { }, Handler: cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { - cm.ListUsers() + cm.ListUsers("Add") }, DeleteFunc: func(obj interface{}) { - cm.ListUsers() + cm.ListUsers("Delete") }, }, } } -func (cm *CacheManager) ListUsers() { +func (cm *CacheManager) ListUsers(opType string) { dynamicClient := client.Factory.Client() unstructuredUsers, err := dynamicClient.Resource(client.UserGVR).List(context.Background(), v1.ListOptions{}) if err != nil { @@ -2275,6 +2676,7 @@ func (cm *CacheManager) ListUsers() { return } + glog.Infof("[Cache] User watch handler, type: %s", opType) var userList = make([]*client.User, 0) for _, unstructuredUser := range unstructuredUsers.Items { @@ -2292,11 +2694,8 @@ func (cm *CacheManager) ListUsers() { userList = append(userList, user) } - if flag := cm.TryLock(); !flag { - glog.Warning("[TryLock] watch user list lock failed") - return - } - defer cm.Unlock() + cm.mutex.Lock() + defer cm.mutex.Unlock() if len(cm.cache.Users) == 0 { glog.V(2).Info("watch user list, cache user not exists") @@ -2327,3 +2726,100 @@ func (cm *CacheManager) ListUsers() { } } } + +func (cm *CacheManager) RemoveDeletedUser() { + cm.mutex.Lock() + defer cm.mutex.Unlock() + + var users []string + for _, user := range cm.cache.Users { + if user.UserInfo == nil { + continue + } + if !user.UserInfo.Exists { + users = append(users, user.UserInfo.Name) + } + } + + if len(users) == 0 { + return + } + + glog.Infof("[Cache] Remove deleted users: %v", users) + for _, u := range users { + delete(cm.cache.Users, u) + } +} + +func (cm *CacheManager) GetCachedData() string { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + var items []map[string]interface{} + + for un, uv := range cm.cache.Users { + var user = make(map[string]interface{}) + var ss = make(map[string]interface{}) + for sn, sv := range uv.Sources { + var apps = make(map[string]interface{}) + apps["latest"] = len(sv.AppInfoLatest) + apps["pending"] = len(sv.AppInfoLatestPending) + var pendings []string + if len(sv.AppInfoLatestPending) < 10 { + for _, pending := range sv.AppInfoLatestPending { + pendings = append(pendings, fmt.Sprintf("%s_%s", pending.AppInfo.AppEntry.Name, pending.AppInfo.AppEntry.Version)) + } + } + apps["pending_apps"] = pendings + + apps["failed"] = len(sv.AppRenderFailed) + var failes []string + if len(sv.AppRenderFailed) < 5 { + for _, fail := range sv.AppRenderFailed { + failes = append(failes, fmt.Sprintf("%s_%s", fail.AppInfo.AppEntry.Name, fail.AppInfo.AppEntry.Version)) + } + } + apps["failed_apps"] = failes + + apps["history"] = len(sv.AppInfoHistory) + apps["state"] = len(sv.AppStateLatest) + var status []string + if len(sv.AppStateLatest) > 0 { + for _, state := range sv.AppStateLatest { + status = append(status, fmt.Sprintf("%s_%s", state.Status.Name, state.Status.State)) + } + } + apps["state_apps"] = status + + ss[sn] = apps + } + user[un] = ss + items = append(items, user) + } + + result, _ := json.Marshal(items) + return string(result) +} + +func (cm *CacheManager) CompareAppStateMsg(userID string, sourceID string, appName string, checker CompareAppStateMsgFunc) { + cm.mutex.RLock() + defer cm.mutex.RUnlock() + + userData := cm.cache.Users[userID] + if userData == nil { + return + } + + sourceData := userData.Sources[sourceID] + if sourceData == nil { + return + } + + for _, appState := range sourceData.AppStateLatest { + if appState.Status.Name != appName { + continue + } + checker(appState) + return + } +} diff --git a/internal/v2/appinfo/datasender_app.go b/internal/v2/appinfo/datasender_app.go index 0267b44..5c8a648 100644 --- a/internal/v2/appinfo/datasender_app.go +++ b/internal/v2/appinfo/datasender_app.go @@ -96,7 +96,7 @@ func loadConfig() Config { } // SendAppInfoUpdate sends app info update to NATS -func (ds *DataSender) SendAppInfoUpdate(update types.AppInfoUpdate) error { +func (ds *DataSender) SendAppInfoUpdate(update types.AppInfoUpdate, trace string) error { if !ds.enabled { glog.V(3).Info("NATS data sender is disabled, skipping message send") return nil @@ -115,10 +115,10 @@ func (ds *DataSender) SendAppInfoUpdate(update types.AppInfoUpdate) error { subject := fmt.Sprintf("%s.%s", ds.subject, update.User) // Log before sending - if len(string(data)) > 500 { - glog.V(2).Infof("App - Sending app info update to NATS subject '%s': %s", subject, string(data)[:500]) + if len(string(data)) > 800 { + glog.V(2).Infof("App - Sending app info update to NATS subject '%s'(trace: %s): %s", subject, trace, string(data)[:800]) } else { - glog.V(2).Infof("App - Sending app info update to NATS subject '%s': %s", subject, string(data)) + glog.V(2).Infof("App - Sending app info update to NATS subject '%s'(trace: %s): %s", subject, trace, string(data)) } // Send message to NATS diff --git a/internal/v2/appinfo/datawatcher_app.go b/internal/v2/appinfo/datawatcher_app.go index 365b345..b3f253f 100644 --- a/internal/v2/appinfo/datawatcher_app.go +++ b/internal/v2/appinfo/datawatcher_app.go @@ -30,6 +30,10 @@ type DataWatcher struct { activeHashCalculations map[string]bool hashMutex sync.Mutex + // Dirty users tracking for deferred hash calculation + dirtyUsers map[string]bool + dirtyUsersMutex sync.Mutex + // Metrics - using atomic operations for thread safety totalAppsProcessed int64 totalAppsMoved int64 @@ -46,11 +50,18 @@ func NewDataWatcher(cacheManager *CacheManager, hydrator *Hydrator, dataSender * stopChan: make(chan struct{}), isRunning: 0, // Initialize as false activeHashCalculations: make(map[string]bool), + dirtyUsers: make(map[string]bool), } } // Start begins the data watching process func (dw *DataWatcher) Start(ctx context.Context) error { + return dw.StartWithOptions(ctx, true) +} + +// StartWithOptions begins the data watching process with options +// If enableWatchLoop is false, the periodic watchLoop is not started (used when serial pipeline handles processing) +func (dw *DataWatcher) StartWithOptions(ctx context.Context, enableWatchLoop bool) error { if atomic.LoadInt32(&dw.isRunning) == 1 { return fmt.Errorf("DataWatcher is already running") } @@ -64,10 +75,13 @@ func (dw *DataWatcher) Start(ctx context.Context) error { } atomic.StoreInt32(&dw.isRunning, 1) - glog.Infof("Starting DataWatcher with interval: %v", time.Duration(atomic.LoadInt64((*int64)(&dw.interval)))) - // Start the monitoring goroutine - go dw.watchLoop(ctx) + if enableWatchLoop { + glog.Infof("Starting DataWatcher with interval: %v", time.Duration(atomic.LoadInt64((*int64)(&dw.interval)))) + go dw.watchLoop(ctx) + } else { + glog.Infof("Starting DataWatcher in passive mode (serial pipeline handles processing)") + } return nil } @@ -89,6 +103,7 @@ func (dw *DataWatcher) IsRunning() bool { } // watchLoop is the main monitoring loop +// ~ not used func (dw *DataWatcher) watchLoop(ctx context.Context) { glog.Infof("DataWatcher monitoring loop started") defer glog.Infof("DataWatcher monitoring loop stopped") @@ -97,7 +112,7 @@ func (dw *DataWatcher) watchLoop(ctx context.Context) { defer ticker.Stop() // Run once immediately - dw.processCompletedApps() + dw.processCompletedApps() // not used for { select { @@ -108,7 +123,7 @@ func (dw *DataWatcher) watchLoop(ctx context.Context) { glog.Infof("DataWatcher stopped due to explicit stop") return case <-ticker.C: - dw.processCompletedApps() + dw.processCompletedApps() // not used } } } @@ -134,8 +149,7 @@ func (dw *DataWatcher) processCompletedApps() { // Get all users data from cache manager with timeout var allUsersData map[string]*types.UserData - // Use fallback method with TryRLock to avoid blocking - allUsersData = dw.cacheManager.GetAllUsersDataWithFallback() + allUsersData = dw.cacheManager.GetAllUsersData() // not used if len(allUsersData) == 0 { glog.Infof("DataWatcher: No users data found, processing cycle completed") @@ -159,7 +173,7 @@ func (dw *DataWatcher) processCompletedApps() { // Process batch when it's full or we've reached the end if len(userBatch) >= batchSize || userCount == len(allUsersData) { - batchProcessed, batchMoved := dw.processUserBatch(ctx, userBatch, userDataBatch) + batchProcessed, batchMoved := dw.processUserBatch(ctx, userBatch, userDataBatch) // not used totalProcessed += batchProcessed totalMoved += batchMoved @@ -211,7 +225,7 @@ func (dw *DataWatcher) processUserBatch(ctx context.Context, userIDs []string, u } glog.V(3).Infof("DataWatcher: Processing user %d/%d in batch: %s", i+1, len(userIDs), userID) - processed, moved := dw.processUserData(userID, userData) + processed, moved := dw.processUserData(userID, userData) // not used totalProcessed += processed totalMoved += moved glog.V(2).Infof("DataWatcher: User %s completed: %d processed, %d moved", userID, processed, moved) @@ -221,6 +235,7 @@ func (dw *DataWatcher) processUserBatch(ctx context.Context, userIDs []string, u } // processUserData processes a single user's data +// ~ not used func (dw *DataWatcher) processUserData(userID string, userData *types.UserData) (int64, int64) { if userData == nil { return 0, 0 @@ -237,51 +252,14 @@ func (dw *DataWatcher) processUserData(userID string, userData *types.UserData) totalMoved := int64(0) for sourceID, sourceData := range sourceRefs { - processed, moved := dw.processSourceData(userID, sourceID, sourceData) + processed, moved := dw.processSourceData(userID, sourceID, sourceData) // not used totalProcessed += processed totalMoved += moved } - // Step 3: Calculate hash if apps were moved OR if hash is empty - shouldCalculateHash := totalMoved > 0 || userData.Hash == "" - - if shouldCalculateHash { - if totalMoved > 0 { - glog.Infof("DataWatcher: %d apps moved for user %s, scheduling hash calculation", totalMoved, userID) - } else { - glog.Infof("DataWatcher: Hash is empty for user %s, scheduling hash calculation", userID) - } - - // Schedule hash calculation in a separate goroutine without setting the flag here - go func() { - // Check if hash calculation is already in progress for this user - dw.hashMutex.Lock() - if dw.activeHashCalculations[userID] { - dw.hashMutex.Unlock() - glog.Warningf("DataWatcher: Hash calculation already in progress for user %s, skipping", userID) - return - } - dw.activeHashCalculations[userID] = true - dw.hashMutex.Unlock() - - defer func() { - // Clean up tracking when done - dw.hashMutex.Lock() - delete(dw.activeHashCalculations, userID) - dw.hashMutex.Unlock() - glog.V(3).Infof("DataWatcher: Hash calculation tracking cleaned up for user %s", userID) - }() - - // Wait a short time to ensure all source processing locks are released - time.Sleep(100 * time.Millisecond) - glog.V(3).Infof("DataWatcher: Starting hash calculation for user %s", userID) - - // Call the hash calculation function directly - dw.calculateAndSetUserHashDirect(userID, userData) - }() - } else { - glog.V(3).Infof("DataWatcher: No apps moved and hash exists for user %s, skipping hash calculation", userID) - } + // Hash calculation is deferred to Pipeline Phase 5. + // The caller (Pipeline.phaseHydrateApps) tracks affected users and + // Phase 5 will calculate hashes for all affected users in one pass. return totalProcessed, totalMoved } @@ -349,136 +327,37 @@ func (dw *DataWatcher) calculateAndSetUserHashWithRetry(userID string, userData glog.Errorf("DataWatcher: Hash calculation failed after %d attempts for user %s", maxRetries, userID) } -// calculateAndSetUserHashDirect calculates hash without tracking (used internally by goroutines) +// calculateAndSetUserHashDirect calculates and updates hash for a single user. +// Does NOT call ForceSync — the caller (Pipeline Phase 5) is responsible for syncing. func (dw *DataWatcher) calculateAndSetUserHashDirect(userID string, userData *types.UserData) bool { glog.V(3).Infof("DataWatcher: Starting direct hash calculation for user %s", userID) - // Get the original user data from cache manager to ensure we have the latest reference originalUserData := dw.cacheManager.GetUserData(userID) if originalUserData == nil { glog.Errorf("DataWatcher: Failed to get user data from cache manager for user %s", userID) return false } - // Create snapshot for hash calculation without holding any locks - glog.V(3).Infof("DataWatcher: Creating user data snapshot for user %s", userID) snapshot, err := utils.CreateUserDataSnapshot(userID, originalUserData) if err != nil { glog.Errorf("DataWatcher: Failed to create user data snapshot for user %s: %v", userID, err) return false } - glog.V(4).Infof("DataWatcher: Calculating hash for user %s", userID) - // Calculate hash using the snapshot newHash, err := utils.CalculateUserDataHash(snapshot) if err != nil { glog.Errorf("DataWatcher: Failed to calculate hash for user %s: %v", userID, err) return false } - // Get current hash for comparison currentHash := originalUserData.Hash - glog.V(3).Infof("DataWatcher: Hash comparison for user %s - current: '%s', new: '%s'", userID, currentHash, newHash) - if currentHash == newHash { glog.V(2).Infof("DataWatcher: Hash unchanged for user %s: %s", userID, newHash) return true } glog.V(2).Infof("DataWatcher: Hash changed for user %s: %s -> %s", userID, currentHash, newHash) - - // Use a single write lock acquisition with timeout to avoid deadlock - writeTimeout := 5 * time.Second - writeLockAcquired := make(chan bool, 1) - writeLockError := make(chan error, 1) - cancel := make(chan bool, 1) - - go func() { - defer func() { - if r := recover(); r != nil { - glog.Errorf("DataWatcher: Panic during write lock acquisition for user %s: %v", userID, r) - writeLockError <- fmt.Errorf("panic during write lock acquisition: %v", r) - } - }() - - glog.V(3).Infof("DataWatcher: Attempting to acquire write lock for user %s", userID) - glog.V(3).Infof("[LOCK] dw.cacheManager.mutex.TryLock() @439 Start") - if !dw.cacheManager.mutex.TryLock() { - glog.Warningf("DataWatcher: Write lock not available for user %s, skipping hash update", userID) - writeLockError <- fmt.Errorf("write lock not available") - return - } - defer func() { - dw.cacheManager.mutex.Unlock() - glog.V(3).Infof("[LOCK] dw.cacheManager.mutex.Unlock() @453 Start") - glog.V(3).Infof("DataWatcher: Write lock released for user %s", userID) - }() - - // Check if cancelled before sending signal - select { - case <-cancel: - glog.V(3).Infof("DataWatcher: Write lock acquisition cancelled for user %s", userID) - return - default: - } - - glog.V(3).Infof("DataWatcher: Write lock acquired for user %s", userID) - glog.V(3).Infof("[LOCK] dw.cacheManager.mutex.Lock() @439 Success") - - // Send signal and wait for processing - select { - case writeLockAcquired <- true: - // Successfully sent signal, wait for cancellation or completion - <-cancel - case <-cancel: - glog.V(3).Infof("DataWatcher: Write lock acquisition cancelled before signal for user %s", userID) - } - }() - - select { - case <-writeLockAcquired: - // Write lock acquired successfully - glog.V(3).Infof("DataWatcher: Write lock acquired for hash update, user %s", userID) - - // Update hash and release lock immediately - originalUserData.Hash = newHash - glog.V(3).Infof("DataWatcher: Hash updated in memory for user %s", userID) - - // Cancel the goroutine to release the lock - close(cancel) - - case err := <-writeLockError: - glog.Errorf("DataWatcher: Error acquiring write lock for user %s: %v", userID, err) - close(cancel) - return false - - case <-time.After(writeTimeout): - glog.Errorf("DataWatcher: Timeout acquiring write lock for hash update, user %s", userID) - close(cancel) - return false - } - - glog.V(3).Infof("DataWatcher: Hash updated for user %s", userID) - - // Verification: Check if the hash was actually updated - if glog.V(2) { - verifyUserData := dw.cacheManager.GetUserData(userID) - if verifyUserData != nil { - verifyHash := verifyUserData.Hash - glog.V(2).Infof("DataWatcher: Verification - hash = '%s' for user %s", verifyHash, userID) - } else { - glog.Errorf("DataWatcher: Verification failed - CacheManager.GetUserData returned nil for user %s", userID) - } - } - - // Trigger force sync to persist the hash change - glog.V(3).Infof("DataWatcher: Starting force sync for user %s", userID) - if err := dw.cacheManager.ForceSync(); err != nil { - glog.V(4).Infof("DataWatcher: Failed to force sync after hash update for user %s: %v", userID, err) - return false - } else { - glog.V(2).Infof("DataWatcher: Force sync completed after hash update for user %s", userID) - } + dw.cacheManager.SetUserHash(userID, newHash) return true } @@ -511,39 +390,14 @@ func (dw *DataWatcher) calculateAndSetUserHashAsync(userID string, userData *typ } // processSourceData processes a single source's data for completed hydration +// ~ not used func (dw *DataWatcher) processSourceData(userID, sourceID string, sourceData *types.SourceData) (int64, int64) { if sourceData == nil { return 0, 0 } - var pendingApps []*types.AppInfoLatestPendingData - var appInfoLatest []*types.AppInfoLatestData - // Step 1: Quick check and data copy with minimal lock time - func() { - glog.V(3).Info("[LOCK] dw.cacheManager.mutex.TryRLock() @660 Start") - if !dw.cacheManager.mutex.TryRLock() { - glog.V(3).Infof("[TryRLock] processSourceData: Read lock not available for user: %s, source: %s, skipping", userID, sourceID) - return - } - defer func() { - dw.cacheManager.mutex.RUnlock() - glog.V(3).Infof("[LOCK] dw.cacheManager.mutex.RUnlock() @660 End") - }() - - // Quick check - if no pending apps, exit early - if len(sourceData.AppInfoLatestPending) == 0 { - return - } - - // Copy references to pending apps for processing - pendingApps = make([]*types.AppInfoLatestPendingData, len(sourceData.AppInfoLatestPending)) - copy(pendingApps, sourceData.AppInfoLatestPending) - - // Copy references to existing AppInfoLatest - appInfoLatest = make([]*types.AppInfoLatestData, len(sourceData.AppInfoLatest)) - copy(appInfoLatest, sourceData.AppInfoLatest) - }() + pendingApps, _ := dw.cacheManager.SnapshotSourcePending(userID, sourceID) // Early exit if no pending apps if len(pendingApps) == 0 { @@ -583,135 +437,38 @@ func (dw *DataWatcher) processSourceData(userID, sourceID string, sourceData *ty } glog.Infof("DataWatcher: user=%s source=%s completed=%d/%d apps=[%s]", userID, sourceID, len(completedApps), len(pendingApps), strings.Join(completedIDs, ",")) - // Step 3: Try to acquire write lock non-blocking and move completed apps - lockStartTime := time.Now() - - // Try to acquire write lock non-blocking with cancellation support - lockAcquired := make(chan bool, 1) - lockCancel := make(chan bool, 1) - - go func() { - glog.V(3).Info("[LOCK] dw.cacheManager.mutex.TryLock() @716 Start") - if !dw.cacheManager.mutex.TryLock() { - glog.Warningf("DataWatcher: Write lock not available for user %s, source %s, skipping app move", userID, sourceID) - return - } - defer func() { - dw.cacheManager.mutex.Unlock() - glog.V(3).Info("[LOCK] dw.cacheManager.mutex.Unlock() @725 Start") - }() - - // Check if cancelled before sending signal - select { - case <-lockCancel: - glog.V(3).Infof("DataWatcher: Write lock acquisition cancelled for user=%s, source=%s", userID, sourceID) - return - default: - } - - glog.V(3).Info("[LOCK] dw.cacheManager.mutex.Lock() @716 Success") - - // Send signal and wait for processing - select { - case lockAcquired <- true: - // Successfully sent signal, wait for cancellation - <-lockCancel - case <-lockCancel: - glog.V(3).Infof("DataWatcher: Write lock acquisition cancelled before signal for user=%s, source=%s", userID, sourceID) + // Step 3: Move completed apps from pending to latest via CacheManager + movedCount := int64(0) + for _, completedApp := range completedApps { + latestData := dw.convertPendingToLatest(completedApp) + if latestData == nil { + continue } - }() - - // Use a short timeout to avoid blocking too long - select { - case <-lockAcquired: - glog.V(3).Infof("DataWatcher: Write lock acquired for user=%s, source=%s", userID, sourceID) - - defer func() { - totalLockTime := time.Since(lockStartTime) - glog.V(3).Infof("DataWatcher: Write lock released after %v for user=%s, source=%s", totalLockTime, userID, sourceID) - // Cancel the goroutine to release the lock - close(lockCancel) - }() - - // Move completed apps from pending to latest - movedCount := int64(0) - for _, completedApp := range completedApps { - // Convert to AppInfoLatestData - latestData := dw.convertPendingToLatest(completedApp) - if latestData != nil { - // Check if app with same name already exists in AppInfoLatest - appName := dw.getAppName(completedApp) - existingIndex := -1 - - // Find existing app with same name - for i, existingApp := range sourceData.AppInfoLatest { - if existingApp != nil { - existingAppName := dw.getAppNameFromLatest(existingApp) - if existingAppName == appName { - existingIndex = i - break - } - } - } - - if existingIndex >= 0 { - - if latestData.AppInfo.AppEntry.Version != sourceData.AppInfoLatest[existingIndex].AppInfo.AppEntry.Version { - // Send system notification for new app ready - dw.sendNewAppReadyNotification(userID, completedApp, sourceID) - glog.V(3).Infof("DataWatcher: Sent system notification for new app ready: %s", appName) - } - - // Replace existing app with same name - sourceData.AppInfoLatest[existingIndex] = latestData - glog.V(3).Infof("DataWatcher: Replaced existing app with same name: %s (index: %d)", appName, existingIndex) - - } else { - // Add new app if no existing app with same name - sourceData.AppInfoLatest = append(sourceData.AppInfoLatest, latestData) - glog.V(2).Infof("DataWatcher: Added new app to latest: %s", appName) - // Send system notification for new app ready - dw.sendNewAppReadyNotification(userID, completedApp, sourceID) - } - - movedCount++ + appID := dw.getAppID(completedApp) + appName := dw.getAppName(completedApp) - } + oldVersion, replaced, ok := dw.cacheManager.UpsertLatestAndRemovePending(userID, sourceID, latestData, appID, appName) + if !ok { + continue } - // Remove completed apps from pending list - if movedCount > 0 { - newPendingList := make([]*types.AppInfoLatestPendingData, 0, len(sourceData.AppInfoLatestPending)-int(movedCount)) - completedAppIDs := make(map[string]bool) - - // Create a map of completed app IDs for efficient lookup - for _, completedApp := range completedApps { - appID := dw.getAppID(completedApp) - if appID != "" { - completedAppIDs[appID] = true - } + if replaced { + newVersion := "" + if latestData.AppInfo != nil && latestData.AppInfo.AppEntry != nil { + newVersion = latestData.AppInfo.AppEntry.Version } - - // Filter out completed apps from pending list - for _, pendingApp := range sourceData.AppInfoLatestPending { - appID := dw.getAppID(pendingApp) - if !completedAppIDs[appID] { - newPendingList = append(newPendingList, pendingApp) - } + if oldVersion != newVersion { + dw.sendNewAppReadyNotification(userID, completedApp, sourceID) // ~ not used } - - sourceData.AppInfoLatestPending = newPendingList - glog.Infof("DataWatcher: Updated pending list: %d -> %d apps for user=%s, source=%s", - len(sourceData.AppInfoLatestPending)+int(movedCount), len(sourceData.AppInfoLatestPending), userID, sourceID) + glog.V(3).Infof("DataWatcher: Replaced existing app: %s", appName) + } else { + glog.V(2).Infof("DataWatcher: Added new app to latest: %s", appName) + dw.sendNewAppReadyNotification(userID, completedApp, sourceID) // ~ not used } - - return int64(len(pendingApps)), movedCount - - case <-time.After(2 * time.Second): - close(lockCancel) // Cancel the goroutine to release the lock - glog.V(3).Infof("DataWatcher: Skipping write lock acquisition for user=%s, source=%s (timeout after 2s) - will retry in next cycle", userID, sourceID) - return int64(len(pendingApps)), 0 + movedCount++ } + + return int64(len(pendingApps)), movedCount } // isAppHydrationCompletedWithTimeout checks if app hydration is completed with timeout protection @@ -1160,7 +917,7 @@ func (dw *DataWatcher) ForceCalculateAllUsersHash() error { glog.V(3).Infof("DataWatcher: Force calculating hash for all users") // Get all users data - allUsersData := dw.cacheManager.GetAllUsersData() + allUsersData := dw.cacheManager.GetAllUsersData() // not used if len(allUsersData) == 0 { return fmt.Errorf("no users found in cache") } @@ -1175,6 +932,31 @@ func (dw *DataWatcher) ForceCalculateAllUsersHash() error { return nil } +// MarkUserDirty marks a user as needing hash recalculation. +// Called by event-driven paths (e.g. DataWatcherState) that modify user data +// outside the Pipeline cycle. The dirty users will be picked up by Pipeline Phase 5. +func (dw *DataWatcher) MarkUserDirty(userID string) { + dw.dirtyUsersMutex.Lock() + defer dw.dirtyUsersMutex.Unlock() + dw.dirtyUsers[userID] = true + glog.V(3).Infof("DataWatcher: Marked user %s as dirty for deferred hash calculation", userID) +} + +// CollectAndClearDirtyUsers returns all dirty user IDs and clears the set. +// Called by Pipeline Phase 5 to collect users that need hash recalculation +// from event-driven paths. +func (dw *DataWatcher) CollectAndClearDirtyUsers() map[string]bool { + dw.dirtyUsersMutex.Lock() + defer dw.dirtyUsersMutex.Unlock() + if len(dw.dirtyUsers) == 0 { + return nil + } + result := dw.dirtyUsers + dw.dirtyUsers = make(map[string]bool) + glog.V(3).Infof("DataWatcher: Collected %d dirty users for hash calculation", len(result)) + return result +} + // getAppVersion extracts app version from pending app data func (dw *DataWatcher) getAppVersion(pendingApp *types.AppInfoLatestPendingData) string { if pendingApp == nil { @@ -1244,3 +1026,54 @@ func (dw *DataWatcher) sendNewAppReadyNotification(userID string, completedApp * glog.V(2).Infof("DataWatcher: Successfully sent new app ready notification for app %s (version: %s, source: %s)", appName, appVersion, sourceID) } } + +// ProcessSingleAppToLatest moves a single completed pending app to AppInfoLatest +// Returns true if the app was successfully moved +func (dw *DataWatcher) ProcessSingleAppToLatest(userID, sourceID string, pendingApp *types.AppInfoLatestPendingData) bool { + if pendingApp == nil { + return false + } + + // Check hydration completion + if dw.hydrator != nil && !dw.hydrator.isAppHydrationComplete(pendingApp) { + return false + } + + // Convert to latest data + latestData := dw.convertPendingToLatest(pendingApp) + if latestData == nil { + return false + } + + appID := dw.getAppID(pendingApp) + appName := dw.getAppName(pendingApp) + glog.V(2).Infof("Pipeline Phase 2: ProcessSingleAppToLatest user=%s, source=%s, id=%s, name=%s", userID, sourceID, appID, appName) + + oldVersion, replaced, ok := dw.cacheManager.UpsertLatestAndRemovePending(userID, sourceID, latestData, appID, appName) + if !ok { + return false + } + + if replaced { + newVersion := "" + if latestData.AppInfo != nil && latestData.AppInfo.AppEntry != nil { + newVersion = latestData.AppInfo.AppEntry.Version + } + if oldVersion != newVersion { + dw.sendNewAppReadyNotification(userID, pendingApp, sourceID) // ~ ProcesSingleAppToLatest + } + glog.V(2).Infof("ProcessSingleAppToLatest: replaced existing app %s (user=%s, source=%s)", appName, userID, sourceID) + } else { + glog.V(2).Infof("ProcessSingleAppToLatest: added new app %s (user=%s, source=%s)", appName, userID, sourceID) + dw.sendNewAppReadyNotification(userID, pendingApp, sourceID) // ~ ProcesSingleAppToLatest + } + + atomic.AddInt64(&dw.totalAppsMoved, 1) + glog.Infof("ProcessSingleAppToLatest: successfully moved app %s to Latest (user=%s, source=%s)", appName, userID, sourceID) + return true +} + +// CalculateAndSetUserHashDirect is a public wrapper for calculateAndSetUserHashDirect +func (dw *DataWatcher) CalculateAndSetUserHashDirect(userID string, userData *types.UserData) bool { + return dw.calculateAndSetUserHashDirect(userID, userData) +} diff --git a/internal/v2/appinfo/datawatcher_repo.go b/internal/v2/appinfo/datawatcher_repo.go index dff7ba3..2f9c8de 100644 --- a/internal/v2/appinfo/datawatcher_repo.go +++ b/internal/v2/appinfo/datawatcher_repo.go @@ -138,11 +138,36 @@ func (dwr *DataWatcherRepo) Start() error { glog.V(3).Info("Starting data watcher with 2-minute intervals") // Start the monitoring goroutine - go dwr.monitorStateChanges() + go dwr.monitorStateChanges() // not used return nil } +// StartWithOptions starts with options, if enablePolling is false, the periodic polling is not started +func (dwr *DataWatcherRepo) StartWithOptions(enablePolling bool) error { + dwr.mu.Lock() + defer dwr.mu.Unlock() + + if dwr.isRunning { + return fmt.Errorf("DataWatcherRepo is already running") + } + + dwr.isRunning = true + glog.V(3).Info("Starting DataWatcherRepo in passive mode (serial pipeline handles processing)") + + return nil +} + +// ProcessOnce executes one round of state change processing, called by Pipeline Phase 3. +// Returns the set of affected user IDs whose data was modified. +func (dwr *DataWatcherRepo) ProcessOnce() map[string]bool { + if !dwr.isRunning { + return nil + } + + return dwr.processStateChanges() +} + // Stop stops the periodic state checking process func (dwr *DataWatcherRepo) Stop() error { dwr.mu.Lock() @@ -177,14 +202,14 @@ func (dwr *DataWatcherRepo) monitorStateChanges() { glog.V(3).Info("State change monitoring started") // Process immediately on start - if err := dwr.processStateChanges(); err != nil { + if err := dwr.processStateChanges(); err != nil { // not used glog.Errorf("Error processing state changes on startup: %v", err) } for { select { case <-dwr.ticker.C: - if err := dwr.processStateChanges(); err != nil { + if err := dwr.processStateChanges(); err != nil { // not used glog.Errorf("Error processing state changes: %v", err) } case <-dwr.stopChannel: @@ -195,30 +220,29 @@ func (dwr *DataWatcherRepo) monitorStateChanges() { } // processStateChanges fetches and processes new state changes -func (dwr *DataWatcherRepo) processStateChanges() error { - glog.V(3).Infof("Processing state changes after ID: %d", dwr.lastProcessedID) +func (dwr *DataWatcherRepo) processStateChanges() map[string]bool { + glog.V(2).Infof("Processing state changes after ID: %d", dwr.lastProcessedID) + affectedUsers := make(map[string]bool) - // Fetch new state changes from API stateChanges, err := dwr.fetchStateChanges(dwr.lastProcessedID) if err != nil { - return fmt.Errorf("failed to fetch state changes: %w", err) + glog.Errorf("Failed to fetch state changes: %v", err) + return affectedUsers } if len(stateChanges) == 0 { - glog.V(3).Info("No new state changes found") - return nil + glog.V(2).Info("No new state changes found") + return affectedUsers } glog.V(2).Infof("Found %d new state changes", len(stateChanges)) - // Sort state changes by ID to ensure proper order sort.Slice(stateChanges, func(i, j int) bool { return stateChanges[i].ID < stateChanges[j].ID }) glog.V(2).Info("State changes sorted by ID, processing in order...") - // Process state changes in order by ID var lastProcessedID int64 for _, change := range stateChanges { if err := dwr.processStateChange(change); err != nil { @@ -226,17 +250,30 @@ func (dwr *DataWatcherRepo) processStateChanges() error { continue } + // // Track affected users from each change type + // if change.AppData != nil && change.AppData.UserID != "" { + // affectedUsers[change.AppData.UserID] = true + // } + // if change.Type == "image_info_updated" { + // // Image updates affect all users + // allUsers := dwr.cacheManager.GetAllUsersData() + // for userID := range allUsers { + // affectedUsers[userID] = true + // } + // } + lastProcessedID = change.ID } - // Update the last processed ID in Redis + dwr.lastProcessedID = lastProcessedID + ctx := context.Background() err = dwr.redisClient.client.Set(ctx, "datawatcher:last_processed_id", strconv.FormatInt(lastProcessedID, 10), 0).Err() if err != nil { glog.Errorf("Failed to update last processed ID in Redis: %v", err) } - return nil + return affectedUsers } // fetchStateChanges calls the /state-changes API to get new state changes @@ -312,7 +349,7 @@ func (dwr *DataWatcherRepo) handleAppUploadCompleted(change *StateChange) error shouldUpdate := dwr.shouldUpdateAppInCache(change.AppData.UserID, change.AppData.Source, change.AppData.AppName, appInfo) if !shouldUpdate { - glog.V(3).Infof("App %s already exists in cache with same or newer version for user %s, source %s", + glog.V(2).Infof("App %s already exists in cache with same or newer version for user %s, source %s", change.AppData.AppName, change.AppData.UserID, change.AppData.Source) return nil } @@ -356,17 +393,7 @@ func (dwr *DataWatcherRepo) handleImageInfoUpdated(change *StateChange) error { updatedCount := dwr.updateImageInfoInCache(imageName, updatedImageInfo) glog.V(3).Infof("Updated image info for %s in %d cache entries", imageName, updatedCount) - // Step 3: Trigger hash calculation for all users - if dwr.dataWatcher != nil { - if err := dwr.dataWatcher.ForceCalculateAllUsersHash(); err != nil { - glog.Errorf("Failed to trigger hash calculation for all users: %v", err) - return fmt.Errorf("failed to trigger hash calculation: %w", err) - } - glog.V(3).Info("Successfully triggered hash calculation for all users after image update") - } else { - glog.V(3).Info("DataWatcher not available, skipping hash calculation") - } - + // Hash calculation is deferred to Pipeline Phase 5. glog.V(2).Infof("Successfully handled image info updated for image: %s", imageName) return nil } diff --git a/internal/v2/appinfo/datawatcher_state.go b/internal/v2/appinfo/datawatcher_state.go index 9d6e85a..a034e9c 100644 --- a/internal/v2/appinfo/datawatcher_state.go +++ b/internal/v2/appinfo/datawatcher_state.go @@ -21,6 +21,8 @@ import ( "github.com/nats-io/nats.go" ) +var nanoTimeLayout = "2006-01-02T15:04:05.999999999Z" // 2006-01-02T15:04:05.000000000Z + // EntranceStatus represents the status of an entrance type EntranceStatus struct { ID string `json:"id"` // ID extracted from URL's first segment after splitting by "." @@ -59,6 +61,7 @@ type AppStateMessage struct { State string `json:"state"` User string `json:"user"` Progress string `json:"progress"` + MarketSource string `json:"marketSource"` Reason string `json:"reason"` Message string `json:"message"` EntranceStatuses []EntranceStatus `json:"entranceStatuses"` @@ -158,30 +161,27 @@ func (dw *DataWatcherState) resolveInvisibleFlag(raw *bool, entranceName, appNam // fetchInvisibleFromAppService fetches invisible flag from app-service API's spec.entrances // Uses caching to avoid repeated API calls for the same app func (dw *DataWatcherState) fetchInvisibleFromAppService(appName, userID, entranceName string) (bool, error) { - // Check cache first (using TryRLock to avoid blocking) cacheKey := fmt.Sprintf("%s:%s", userID, appName) - if dw.appServiceCacheMutex.TryRLock() { - if appCache, exists := dw.appServiceCache[cacheKey]; exists { - if invisible, found := appCache[entranceName]; found { - dw.appServiceCacheMutex.RUnlock() - glog.V(3).Infof("DEBUG: fetchInvisibleFromAppService - using cached invisible=%t for entrance %s (app=%s, user=%s)", - invisible, entranceName, appName, userID) - return invisible, nil - } + + // Check cache first (short read lock) + dw.appServiceCacheMutex.RLock() + if appCache, exists := dw.appServiceCache[cacheKey]; exists { + if invisible, found := appCache[entranceName]; found { + dw.appServiceCacheMutex.RUnlock() + glog.V(3).Infof("fetchInvisibleFromAppService - cached invisible=%t for entrance %s (app=%s, user=%s)", + invisible, entranceName, appName, userID) + return invisible, nil } - dw.appServiceCacheMutex.RUnlock() - } else { - glog.Warningf("[TryRLock] DEBUG: fetchInvisibleFromAppService - read lock not available, skipping cache check for entrance %s (app=%s, user=%s)", - entranceName, appName, userID) } + dw.appServiceCacheMutex.RUnlock() - // Fetch from API + // Fetch from API (no lock held) host := getEnvOrDefault("APP_SERVICE_SERVICE_HOST", "localhost") port := getEnvOrDefault("APP_SERVICE_SERVICE_PORT", "80") url := fmt.Sprintf("http://%s:%s/app-service/v1/all/apps", host, port) client := &http.Client{ - Timeout: 5 * time.Second, // Short timeout to avoid blocking + Timeout: 5 * time.Second, } resp, err := client.Get(url) @@ -204,10 +204,8 @@ func (dw *DataWatcherState) fetchInvisibleFromAppService(appName, userID, entran return false, fmt.Errorf("failed to parse app-service response: %v", err) } - // Find the app matching appName and userID for _, app := range apps { if app.Spec.Name == appName && app.Spec.Owner == userID { - // Find the entrance in spec.entrances first var foundInvisible bool var invisibleValue bool for _, specEntrance := range app.Spec.Entrances { @@ -222,22 +220,18 @@ func (dw *DataWatcherState) fetchInvisibleFromAppService(appName, userID, entran return false, fmt.Errorf("entrance %s not found in spec.entrances for app %s", entranceName, appName) } - // Cache all entrances for this app to avoid future API calls (using TryLock to avoid blocking) - if dw.appServiceCacheMutex.TryLock() { - if dw.appServiceCache[cacheKey] == nil { - dw.appServiceCache[cacheKey] = make(map[string]bool) - } - for _, specEntrance := range app.Spec.Entrances { - dw.appServiceCache[cacheKey][specEntrance.Name] = specEntrance.Invisible - } - dw.appServiceCacheMutex.Unlock() - glog.V(3).Infof("DEBUG: fetchInvisibleFromAppService - fetched and cached invisible=%t for entrance %s (app=%s, user=%s)", - invisibleValue, entranceName, appName, userID) - } else { - glog.Warningf("[TryLock] DEBUG: fetchInvisibleFromAppService - write lock not available, skipping cache update for entrance %s (app=%s, user=%s)", - entranceName, appName, userID) + // Write cache (separate write lock, no read lock held) + dw.appServiceCacheMutex.Lock() + if dw.appServiceCache[cacheKey] == nil { + dw.appServiceCache[cacheKey] = make(map[string]bool) + } + for _, specEntrance := range app.Spec.Entrances { + dw.appServiceCache[cacheKey][specEntrance.Name] = specEntrance.Invisible } + dw.appServiceCacheMutex.Unlock() + glog.V(3).Infof("fetchInvisibleFromAppService - fetched and cached invisible=%t for entrance %s (app=%s, user=%s)", + invisibleValue, entranceName, appName, userID) return invisibleValue, nil } } @@ -375,7 +369,7 @@ func (dw *DataWatcherState) startNatsConnection() error { } // handleMessage processes incoming NATS messages -func (dw *DataWatcherState) handleMessage(msg *nats.Msg) { +func (dw *DataWatcherState) handleMessage(msg *nats.Msg) { // + glog.V(2).Infof("State - Received message from NATS subject %s: %s", msg.Subject, string(msg.Data)) var appStateMsg AppStateMessage @@ -503,7 +497,7 @@ func (dw *DataWatcherState) handleMessage(msg *nats.Msg) { // This avoids blocking NATS message processing glog.V(2).Infof("Delaying pending state message for app=%s, user=%s, opID=%s - failed to acquire lock", appStateMsg.Name, appStateMsg.User, appStateMsg.OpID) - dw.addDelayedMessage(msg, appStateMsg) + dw.addDelayedMessage(msg, appStateMsg) // install + pending return } if hasPendingTask { @@ -511,7 +505,7 @@ func (dw *DataWatcherState) handleMessage(msg *nats.Msg) { // This avoids matching to the wrong source when a new install starts glog.V(2).Infof("Delaying pending state message for app=%s, user=%s, opID=%s - found pending/running install task", appStateMsg.Name, appStateMsg.User, appStateMsg.OpID) - dw.addDelayedMessage(msg, appStateMsg) + dw.addDelayedMessage(msg, appStateMsg) // install + pending return } } @@ -539,7 +533,7 @@ func (dw *DataWatcherState) handleMessage(msg *nats.Msg) { // Task not found in database, delay to wait for task to be persisted glog.Errorf("Delaying pending state message for app=%s, user=%s, opID=%s - task not found in DB, waiting for persistence", appStateMsg.Name, appStateMsg.User, appStateMsg.OpID) - dw.addDelayedMessage(msg, appStateMsg) + dw.addDelayedMessage(msg, appStateMsg) // install + pending return } // Task found in database, can proceed (storeStateToCache will use OpID to query from DB) @@ -549,78 +543,132 @@ func (dw *DataWatcherState) handleMessage(msg *nats.Msg) { } } - userData := dw.cacheManager.getUserData(appStateMsg.User) - if userData == nil { - glog.V(2).Infof("User data not found for user %s", appStateMsg.User) - return - } - - for _, sourceData := range userData.Sources { - for _, appState := range sourceData.AppStateLatest { - if appState.Status.Name == appStateMsg.Name { // && appState.Status.State == appStateMsg.State - - /** - * [Mandatory Sync Whitelist] - * The cases below define critical state transition scenarios that must be processed. - * - * Background: - * When a user performs an action in the UI (e.g., canceling installation/download) or when an app lifecycle event completes (e.g., installation finished, uninstallation finished), - * the final state pushed by NATS (appStateMsg.State) may differ from the cached state in memory (appState.Status.State). - * - * Purpose: - * Even if the progress (Progress) has not changed and there is no entrance information (EntranceStatuses), - * as long as the following conditions are met, we must bypass the "deduplication check" in the default branch. - * This forces the local state to update and pushes the change to the frontend, ensuring the UI promptly reflects the final result. - */ - switch { - // NATS State APP State - case appStateMsg.State == "running" && appState.Status.State == "installing": - case appStateMsg.State == "running" && appState.Status.State == "initializing": - case appStateMsg.State == "uninstalled" && appState.Status.State == "running": - case appStateMsg.State == "uninstalled" && appState.Status.State == "stopped": - case appStateMsg.State == "uninstalled" && appState.Status.State == "uninstalling": - case appStateMsg.State == "uninstalled" && appState.Status.State == "installingCanceling": - case appStateMsg.State == "uninstalled" && appState.Status.State == "installingCancelFailed": - case appStateMsg.State == "stopped" && appState.Status.State == "pending": - case appStateMsg.State == "pendingCanceled" && appState.Status.State == "pending": - case appStateMsg.State == "downloadingCanceled" && appState.Status.State == "downloadingCanceling": - case appStateMsg.State == "downloadingCanceled" && appState.Status.State == "pending": - case appStateMsg.State == "installingCanceled" && appState.Status.State == "installing": - case appStateMsg.State == "installingCanceled" && appState.Status.State == "installingCanceling": - case appStateMsg.State == "running" && appState.Status.State == "resuming": - case appStateMsg.State == "stopped" && appState.Status.State == "resuming": - case appStateMsg.State == "installingCanceled" && appState.Status.State == "resuming": - case appStateMsg.State == "stopped" && appState.Status.State == "stopping": - default: - if len(appStateMsg.EntranceStatuses) == 0 && appState.Status.Progress == appStateMsg.Progress { - glog.V(2).Infof("App state message is the same as the cached app state message for app %s, user %s, source %s, appState: %s, msgState: %s", - appStateMsg.Name, appStateMsg.User, appStateMsg.OpID, appState.Status.State, appStateMsg.State) - return - } - } + shouldUpdate := true + checker := func(appState *AppStateLatestData) { + switch { + // NATS State APP State + case appStateMsg.State == "running" && appState.Status.State == "installing": + case appStateMsg.State == "running" && appState.Status.State == "initializing": + case appStateMsg.State == "uninstalled" && appState.Status.State == "running": + case appStateMsg.State == "uninstalled" && appState.Status.State == "stopped": + case appStateMsg.State == "uninstalled" && appState.Status.State == "uninstalling": + case appStateMsg.State == "uninstalled" && appState.Status.State == "installingCanceling": + case appStateMsg.State == "uninstalled" && appState.Status.State == "installingCancelFailed": + case appStateMsg.State == "pendingCanceled" && appState.Status.State == "pending": + case appStateMsg.State == "stopped" && appState.Status.State == "pending": + case appStateMsg.State == "downloadingCanceled" && appState.Status.State == "downloadingCanceling": + case appStateMsg.State == "downloadingCanceled" && appState.Status.State == "pending": + case appStateMsg.State == "installingCanceled" && appState.Status.State == "installing": + case appStateMsg.State == "installingCanceled" && appState.Status.State == "installingCanceling": + case appStateMsg.State == "running" && appState.Status.State == "resuming": + case appStateMsg.State == "stopped" && appState.Status.State == "resuming": + case appStateMsg.State == "installingCanceled" && appState.Status.State == "resuming": + case appStateMsg.State == "stopped" && appState.Status.State == "stopping": + default: + // state = downloading + if len(appStateMsg.EntranceStatuses) == 0 && appState.Status.Progress == appStateMsg.Progress { + glog.V(2).Infof("App state message is the same as the cached app state message for app %s, user %s, source %s, appState: %s, msgState: %s", + appStateMsg.Name, appStateMsg.User, appStateMsg.OpID, appState.Status.State, appStateMsg.State) + shouldUpdate = false + return + } + } - // Compare timestamps properly by parsing them - if appState.Status.StatusTime != "" && appStateMsg.CreateTime != "" { - statusTime, err1 := time.Parse("2006-01-02T15:04:05.000000000Z", appState.Status.StatusTime) - createTime, err2 := time.Parse("2006-01-02T15:04:05.000000000Z", appStateMsg.CreateTime) + // Compare timestamps properly by parsing them + if appState.Status.StatusTime != "" && appStateMsg.CreateTime != "" { + statusTime, err1 := time.Parse(nanoTimeLayout, appState.Status.StatusTime) + createTime, err2 := time.Parse(nanoTimeLayout, appStateMsg.CreateTime) - if err1 == nil && err2 == nil { - if statusTime.After(createTime) { - glog.V(2).Infof("Cached app state is newer than incoming message for app %s, user %s, source %s, appTime: %s, msgTime: %s. Skipping update.", - appStateMsg.Name, appStateMsg.User, appStateMsg.OpID, statusTime.String(), createTime.String()) - return - } - } else { - glog.Errorf("Failed to parse timestamps for comparison: StatusTime=%s, CreateTime=%s, err1=%v, err2=%v", - appState.Status.StatusTime, appStateMsg.CreateTime, err1, err2) - } + if err1 == nil && err2 == nil { + if statusTime.After(createTime) { + glog.V(2).Infof("Cached app state is newer than incoming message for app %s, user %s, source %s, appTime: %s, msgTime: %s. Skipping update.", + appStateMsg.Name, appStateMsg.User, appStateMsg.OpID, statusTime.String(), createTime.String()) + shouldUpdate = false + return } + } else { + glog.Errorf("Failed to parse timestamps for comparison: StatusTime=%s, CreateTime=%s, err1=%v, err2=%v", + appState.Status.StatusTime, appStateMsg.CreateTime, err1, err2) } } } + dw.cacheManager.CompareAppStateMsg(appStateMsg.User, appStateMsg.MarketSource, appStateMsg.Name, checker) + + if !shouldUpdate { + return + } + + // userData := dw.cacheManager.getUserData(appStateMsg.User) + // if userData == nil { + // glog.V(2).Infof("User data not found for user %s", appStateMsg.User) + // return + // } + + // for sourceId, sourceData := range userData.Sources { + // if appStateMsg.MarketSource != "" && sourceId != appStateMsg.MarketSource { + // continue + // } + // for _, appState := range sourceData.AppStateLatest { + // if appState.Status.Name == appStateMsg.Name { // && appState.Status.State == appStateMsg.State + + // /** + // * [Mandatory Sync Whitelist] + // * The cases below define critical state transition scenarios that must be processed. + // * + // * Background: + // * When a user performs an action in the UI (e.g., canceling installation/download) or when an app lifecycle event completes (e.g., installation finished, uninstallation finished), + // * the final state pushed by NATS (appStateMsg.State) may differ from the cached state in memory (appState.Status.State). + // * + // * Purpose: + // * Even if the progress (Progress) has not changed and there is no entrance information (EntranceStatuses), + // * as long as the following conditions are met, we must bypass the "deduplication check" in the default branch. + // * This forces the local state to update and pushes the change to the frontend, ensuring the UI promptly reflects the final result. + // */ + // switch { + // // NATS State APP State + // case appStateMsg.State == "running" && appState.Status.State == "installing": + // case appStateMsg.State == "running" && appState.Status.State == "initializing": + // case appStateMsg.State == "uninstalled" && appState.Status.State == "running": + // case appStateMsg.State == "uninstalled" && appState.Status.State == "stopped": + // case appStateMsg.State == "uninstalled" && appState.Status.State == "uninstalling": + // case appStateMsg.State == "uninstalled" && appState.Status.State == "installingCanceling": + // case appStateMsg.State == "uninstalled" && appState.Status.State == "installingCancelFailed": + // case appStateMsg.State == "pendingCanceled" && appState.Status.State == "pending": + // case appStateMsg.State == "downloadingCanceled" && appState.Status.State == "downloadingCanceling": + // case appStateMsg.State == "downloadingCanceled" && appState.Status.State == "pending": + // case appStateMsg.State == "installingCanceled" && appState.Status.State == "installing": + // case appStateMsg.State == "installingCanceled" && appState.Status.State == "installingCanceling": + // default: + // if len(appStateMsg.EntranceStatuses) == 0 && appState.Status.Progress == appStateMsg.Progress { + // glog.V(2).Infof("App state message is the same as the cached app state message for app %s, user %s, source %s, appState: %s, msgState: %s", + // appStateMsg.Name, appStateMsg.User, appStateMsg.OpID, appState.Status.State, appStateMsg.State) + // return + // } + // } + + // // Compare timestamps properly by parsing them + // if appState.Status.StatusTime != "" && appStateMsg.CreateTime != "" { + // statusTime, err1 := time.Parse("2006-01-02T15:04:05.000000000Z", appState.Status.StatusTime) + // createTime, err2 := time.Parse("2006-01-02T15:04:05.000000000Z", appStateMsg.CreateTime) + + // if err1 == nil && err2 == nil { + // if statusTime.After(createTime) { + // glog.V(2).Infof("Cached app state is newer than incoming message for app %s, user %s, source %s, appTime: %s, msgTime: %s. Skipping update.", + // appStateMsg.Name, appStateMsg.User, appStateMsg.OpID, statusTime.String(), createTime.String()) + // return + // } + // } else { + // glog.Errorf("Failed to parse timestamps for comparison: StatusTime=%s, CreateTime=%s, err1=%v, err2=%v", + // appState.Status.StatusTime, appStateMsg.CreateTime, err1, err2) + // } + // } + // } + // } + // } + // Process the message - glog.V(2).Infof("State - Processs message from NATS subject %s, for internal for opID: %s, app: %s, user: %s, msgState: %s", + glog.V(2).Infof("State - Processs update message from NATS subject %s, for internal for opID: %s, app: %s, user: %s, msgState: %s", msg.Subject, appStateMsg.OpID, appStateMsg.Name, appStateMsg.User, appStateMsg.State) dw.processMessageInternal(msg, appStateMsg) } @@ -855,7 +903,8 @@ func (dw *DataWatcherState) storeStateToCache(msg AppStateMessage) { // Parse statusTime for sorting var statusTime time.Time if appState.Status.StatusTime != "" { - if parsedTime, err := time.Parse("2006-01-02T15:04:05.000000000Z", appState.Status.StatusTime); err == nil { + + if parsedTime, err := time.Parse(nanoTimeLayout, appState.Status.StatusTime); err == nil { statusTime = parsedTime } else { // If parsing fails, use zero time (will be sorted to the end) @@ -1001,22 +1050,15 @@ func (dw *DataWatcherState) storeStateToCache(msg AppStateMessage) { return } - if err := dw.cacheManager.SetAppData(userID, sourceID, AppStateLatest, stateData); err != nil { // + App - Sending + if err := dw.cacheManager.SetAppData(userID, sourceID, AppStateLatest, stateData, "DataWatcherState"); err != nil { // + App - Sending glog.Errorf("Failed to store app state to cache: %v", err) } else { glog.V(2).Infof("Successfully stored app state to cache for user=%s, source=%s, app=%s, state=%s", userID, sourceID, msg.Name, msg.State) - // Call ForceCalculateAllUsersHash for hash calculation after successful cache update + // Mark user as dirty for deferred hash calculation in Pipeline Phase 5 if dw.dataWatcher != nil { - glog.V(3).Infof("Triggering hash recalculation for all users after cache update") - if err := dw.dataWatcher.ForceCalculateAllUsersHash(); err != nil { - glog.Errorf("Failed to force calculate all users hash: %v", err) - } else { - glog.V(2).Infof("Successfully triggered hash recalculation for all users") - } - } else { - glog.V(3).Infof("DataWatcher not available, skipping hash recalculation") + dw.dataWatcher.MarkUserDirty(userID) } } } diff --git a/internal/v2/appinfo/datawatcher_user.go b/internal/v2/appinfo/datawatcher_user.go index 61f1b3c..fc40aaa 100644 --- a/internal/v2/appinfo/datawatcher_user.go +++ b/internal/v2/appinfo/datawatcher_user.go @@ -177,7 +177,7 @@ func (dw *DataWatcherUser) subscribeToMessages() error { // processMessage processes incoming NATS messages func (dw *DataWatcherUser) processMessage(data []byte) { - glog.V(2).Infof("User - Received message from NATS subject %s: %s", string(data)) + glog.V(2).Infof("User - Received message from NATS subject %s", string(data)) var message UserStateMessage if err := json.Unmarshal(data, &message); err != nil { @@ -186,7 +186,7 @@ func (dw *DataWatcherUser) processMessage(data []byte) { } // Print the received message - glog.V(3).Infof("Received app state message - EventType: %s, Username: %s, Timestamp: %s", + glog.V(2).Infof("User - Received watcher user message - EventType: %s, Username: %s, Timestamp: %s", message.EventType, message.Username, message.Timestamp) // Write to history diff --git a/internal/v2/appinfo/db.go b/internal/v2/appinfo/db.go index 63a0107..453e4e9 100644 --- a/internal/v2/appinfo/db.go +++ b/internal/v2/appinfo/db.go @@ -159,6 +159,10 @@ func (r *RedisClient) LoadCacheFromRedis() (*CacheData, error) { func (r *RedisClient) loadUserData(userID string) (*UserData, error) { userData := NewUserDataEx(userID) // NewUserData() + if userData.UserInfo == nil { + return nil, fmt.Errorf("User %s not exists in cluster", userID) + } + // Load user hash from Redis userHashKey := fmt.Sprintf("appinfo:user:%s:hash", userID) hashValue, err := r.client.Get(r.ctx, userHashKey).Result() @@ -166,7 +170,7 @@ func (r *RedisClient) loadUserData(userID string) (*UserData, error) { userData.Hash = hashValue glog.Infof("Loaded user hash from Redis: user=%s, hash=%s", userID, hashValue) } else if err != redis.Nil { - glog.Warningf("Failed to load user hash from Redis: user=%s, error=%v", userID, err) + glog.Errorf("Failed to load user hash from Redis: user=%s, error=%v", userID, err) } // Get all source keys for this user diff --git a/internal/v2/appinfo/diagnostic.go b/internal/v2/appinfo/diagnostic.go index 476e968..66f0000 100644 --- a/internal/v2/appinfo/diagnostic.go +++ b/internal/v2/appinfo/diagnostic.go @@ -45,10 +45,7 @@ func (cm *CacheManager) DiagnoseCacheAndRedis() error { glog.Infof("Redis Keys Found: %d", len(redisKeys)) // Analyze cache state - if !cm.mutex.TryRLock() { - glog.Warningf("Diagnostic: CacheManager read lock not available, skipping cache analysis") - return fmt.Errorf("read lock not available") - } + cm.mutex.RLock() userCount := len(cm.cache.Users) totalSources := 0 issues := 0 @@ -114,8 +111,8 @@ func (cm *CacheManager) GetDiagnosticJSON() (string, error) { } // Get cache stats and users data for JSON response - cacheStats := cm.GetCacheStats() - allUsersData := cm.GetAllUsersData() + cacheStats := cm.GetCacheStats() // not used + allUsersData := cm.GetAllUsersData() // not used diagnosticInfo := map[string]interface{}{ "cache_stats": cacheStats, @@ -143,10 +140,7 @@ func (cm *CacheManager) ForceReloadFromRedis() error { return err } - if !cm.mutex.TryLock() { - glog.Warningf("Diagnostic: Write lock not available for cache reload, skipping") - return fmt.Errorf("write lock not available") - } + cm.mutex.Lock() cm.cache = cache cm.mutex.Unlock() @@ -156,10 +150,7 @@ func (cm *CacheManager) ForceReloadFromRedis() error { // ValidateSourceData validates source data integrity func (cm *CacheManager) ValidateSourceData(userID, sourceID string) (*SourceAnalysis, error) { - if !cm.mutex.TryRLock() { - glog.Warningf("Diagnostic.ValidateSourceData: CacheManager read lock not available for user %s, source %s", userID, sourceID) - return nil, fmt.Errorf("read lock not available") - } + cm.mutex.RLock() defer cm.mutex.RUnlock() userData, exists := cm.cache.Users[userID] diff --git a/internal/v2/appinfo/hydration.go b/internal/v2/appinfo/hydration.go index ea6b0a3..93c5a3d 100644 --- a/internal/v2/appinfo/hydration.go +++ b/internal/v2/appinfo/hydration.go @@ -2,7 +2,6 @@ package appinfo import ( "context" - "encoding/json" "fmt" "os" "reflect" @@ -162,27 +161,17 @@ func (h *Hydrator) AddStep(step hydrationfn.HydrationStep) { h.steps = append(h.steps, step) } -// Start begins the hydration process with workers +// Start begins the hydration process in passive mode (Pipeline handles scheduling) func (h *Hydrator) Start(ctx context.Context) error { if h.isRunning.Load() { return fmt.Errorf("hydrator is already running") } h.isRunning.Store(true) - glog.V(3).Infof("Starting hydrator with %d workers and %d steps", h.workerCount, len(h.steps)) + glog.V(3).Infof("Starting hydrator with %d steps (passive mode, Pipeline handles scheduling)", len(h.steps)) - // Start worker goroutines - for i := 0; i < h.workerCount; i++ { - go h.worker(ctx, i) - } - - // Start pending data monitor - go h.pendingDataMonitor(ctx) - - // Start batch completion processor go h.batchCompletionProcessor(ctx) - // Start database sync monitor if cache manager is available if h.cacheManager != nil { go h.databaseSyncMonitor(ctx) } @@ -206,434 +195,17 @@ func (h *Hydrator) IsRunning() bool { return h.isRunning.Load() } -// EnqueueTask adds a task to the hydration queue -func (h *Hydrator) EnqueueTask(task *hydrationfn.HydrationTask) error { - if !h.IsRunning() { - return fmt.Errorf("hydrator is not running") - } - - select { - case h.taskQueue <- task: - h.trackTask(task) - glog.V(4).Infof("Enqueued hydration task: %s for app: %s (user: %s, source: %s) - Queue length: %d", - task.ID, task.AppID, task.UserID, task.SourceID, len(h.taskQueue)) - return nil - default: - glog.Errorf("ERROR: Task queue is full! Cannot enqueue task: %s for app: %s (user: %s, source: %s) - Queue length: %d", - task.ID, task.AppID, task.UserID, task.SourceID, len(h.taskQueue)) - return fmt.Errorf("task queue is full") - } -} - -// worker processes tasks from the queue -func (h *Hydrator) worker(ctx context.Context, workerID int) { - glog.V(3).Infof("Hydration worker %d started", workerID) - - // Initialize worker status - h.updateWorkerStatus(workerID, nil, true) - - defer func() { - // Mark worker as idle when stopping - h.updateWorkerStatus(workerID, nil, true) - glog.V(4).Infof("Hydration worker %d stopped", workerID) - }() - - for { - select { - case <-ctx.Done(): - return - case <-h.stopChan: - return - case task := <-h.taskQueue: - if task != nil { - glog.V(3).Infof("DEBUG: Worker %d received task from queue: %s for app: %s (user: %s, source: %s)", workerID, task.ID, task.AppID, task.UserID, task.SourceID) - h.processTask(ctx, task, workerID) - } - } - } -} - -// processTask processes a single hydration task -func (h *Hydrator) processTask(ctx context.Context, task *hydrationfn.HydrationTask, workerID int) { - // Add memory monitoring at the start of task processing - h.monitorMemoryUsage() - - glog.V(3).Info("==================== HYDRATION TASK STARTED ====================") - glog.V(3).Infof("Worker %d processing task: %s for app: %s", workerID, task.ID, task.AppID) - - // Update worker status to indicate it's processing this task - h.updateWorkerStatus(workerID, task, false) - taskStartTime := time.Now() - - // Ensure worker status is cleared when task completes or fails - defer h.updateWorkerStatus(workerID, nil, true) - - // Check if task is in cooldown period - if task.LastFailureTime != nil && time.Since(*task.LastFailureTime) < 5*time.Minute { - glog.V(4).Infof("Task %s is in cooldown period, skipping. Next retry available at: %v", - task.ID, task.LastFailureTime.Add(5*time.Minute)) - return - } - - task.SetStatus(hydrationfn.TaskStatusRunning) - - // Execute all steps - for i, step := range h.steps { - if task.CurrentStep > i { - continue // Skip already completed steps - } - - // Check if step can be skipped - if step.CanSkip(ctx, task) { - glog.V(3).Infof("Skipping step %d (%s) for task: %s", i+1, step.GetStepName(), task.ID) - glog.V(3).Infof("-------- HYDRATION STEP %d/%d SKIPPED: %s --------", i+1, len(h.steps), step.GetStepName()) - task.IncrementStep() - continue - } - - glog.V(3).Infof("-------- HYDRATION STEP %d/%d STARTED: %s --------", i+1, len(h.steps), step.GetStepName()) - glog.V(3).Infof("Executing step %d (%s) for task: %s", i+1, step.GetStepName(), task.ID) - - // Update worker status with current step - h.updateWorkerStatus(workerID, task, false) - - // Log task data before step execution - h.logTaskDataBeforeStep(task, i+1, step.GetStepName()) - - // Execute step - if err := step.Execute(ctx, task); err != nil { - glog.Errorf("Step %d (%s) failed for task: %s, app: %s %s %s, error: %v", i+1, step.GetStepName(), task.ID, task.AppID, task.AppName, task.AppVersion, err) - glog.Errorf("-------- HYDRATION STEP %d/%d FAILED: %s --------", i+1, len(h.steps), step.GetStepName()) - task.SetError(err) - - // Clean up resources before failure - h.cleanupTaskResources(task) - - // Set failure time - now := time.Now() - task.LastFailureTime = &now - - // Comment out retry logic - instead move to render failed list - /* - // Check if task can be retried - if task.CanRetry() { - glog.Infof("Task %s failed, will retry after cooldown period (5 minutes). Next retry available at: %v", - task.ID, task.LastFailureTime.Add(5*time.Minute)) - task.ResetForRetry() - - // Re-enqueue for retry after cooldown - go func() { - time.Sleep(5 * time.Minute) // Wait for cooldown period - if err := h.EnqueueTask(task); err != nil { - glog.Errorf("Failed to re-enqueue task for retry: %s, error: %v", task.ID, err) - h.markTaskFailed(task, time.Now(), 0, "retry", err.Error()) - } - }() - glog.V(3).Infof("==================== HYDRATION TASK QUEUED FOR RETRY AFTER COOLDOWN ====================") - return - } else { - // Max retries exceeded - glog.V(3).Infof("Task failed after max retries: %s", task.ID) - h.markTaskFailed(task, time.Now(), 0, "max_retries", "max retries exceeded") - glog.V(3).Infof("==================== HYDRATION TASK FAILED ====================") - return - } - */ - - // Move failed task to render failed list instead of retrying - failureReason := err.Error() - failureStep := step.GetStepName() - - glog.Errorf("Task %s failed at step %s, moving to render failed list with reason: %s", - task.ID, failureStep, failureReason) - - duration := time.Since(taskStartTime) - h.moveTaskToRenderFailed(task, failureReason, failureStep) - h.markTaskFailed(task, taskStartTime, duration, failureStep, failureReason) - - glog.Errorf("==================== HYDRATION TASK MOVED TO RENDER FAILED LIST ====================") - return - } - - // Log task data after step execution - h.logTaskDataAfterStep(task, i+1, step.GetStepName()) - - task.IncrementStep() - glog.V(3).Infof("Step %d (%s) completed for task: %s", i+1, step.GetStepName(), task.ID) - glog.V(4).Infof("-------- HYDRATION STEP %d/%d COMPLETED: %s --------", i+1, len(h.steps), step.GetStepName()) - } - - // All steps completed successfully - task.SetStatus(hydrationfn.TaskStatusCompleted) - duration := time.Since(taskStartTime) - h.markTaskCompleted(task, taskStartTime, duration) - - glog.V(3).Infof("Task completed successfully: %s for app: %s", task.ID, task.AppID) - glog.V(4).Infof("==================== HYDRATION TASK COMPLETED ====================") - glog.V(4).Infoln("") -} - -// updateWorkerStatus updates the status of a worker -func (h *Hydrator) updateWorkerStatus(workerID int, task *hydrationfn.HydrationTask, isIdle bool) { - if !h.workerStatusMutex.TryLock() { - return // Skip if can't acquire lock - } - defer h.workerStatusMutex.Unlock() - - if isIdle { - delete(h.workerStatus, workerID) - return - } - - // Worker is processing a task - var taskInfo *TaskInfo - if task != nil { - taskInfo = h.taskToTaskInfo(task) - } - - h.workerStatus[workerID] = &WorkerStatus{ - WorkerID: workerID, - IsIdle: false, - CurrentTask: taskInfo, - LastActivity: time.Now(), - } -} - -// logTaskDataBeforeStep logs task data before step execution to help debug JSON cycle issues -func (h *Hydrator) logTaskDataBeforeStep(task *hydrationfn.HydrationTask, stepNum int, stepName string) { - glog.V(3).Infof("DEBUG: Before step %d (%s) - Task data structure check", stepNum, stepName) - - // Try to JSON marshal task.ChartData - if len(task.ChartData) > 0 { - if jsonData, err := json.Marshal(task.ChartData); err != nil { - glog.Errorf("ERROR: JSON marshal failed for task.ChartData before step %d: %v, ChartData keys: %v", stepNum, err, h.getMapKeys(task.ChartData)) - } else { - glog.V(3).Infof("DEBUG: task.ChartData JSON length before step %d: %d bytes", stepNum, len(jsonData)) - } - } - - // Try to JSON marshal task.AppData - if len(task.AppData) > 0 { - if jsonData, err := json.Marshal(task.AppData); err != nil { - glog.Errorf("ERROR: JSON marshal failed for task.AppData before step %d: %v, AppData keys: %v", stepNum, err, h.getMapKeys(task.AppData)) - } else { - glog.V(3).Infof("DEBUG: task.AppData JSON length before step %d: %d bytes", stepNum, len(jsonData)) - } - } - - // Try to JSON marshal task.DatabaseUpdateData - if len(task.DatabaseUpdateData) > 0 { - if jsonData, err := json.Marshal(task.DatabaseUpdateData); err != nil { - glog.Errorf("ERROR: JSON marshal failed for task.DatabaseUpdateData before step %d: %v, DatabaseUpdateData keys: %v", stepNum, err, h.getMapKeys(task.DatabaseUpdateData)) - } else { - glog.V(3).Infof("DEBUG: task.DatabaseUpdateData JSON length before step %d: %d bytes", stepNum, len(jsonData)) - } - } -} - -// logTaskDataAfterStep logs task data after step execution to help debug JSON cycle issues -func (h *Hydrator) logTaskDataAfterStep(task *hydrationfn.HydrationTask, stepNum int, stepName string) { - glog.V(3).Infof("DEBUG: After step %d (%s) - Task data structure check", stepNum, stepName) - - // Try to JSON marshal task.ChartData - if len(task.ChartData) > 0 { - if jsonData, err := json.Marshal(task.ChartData); err != nil { - glog.Errorf("ERROR: JSON marshal failed for task.ChartData after step %d: %v, ChartData keys: %v", stepNum, err, h.getMapKeys(task.ChartData)) - } else { - glog.V(3).Infof("DEBUG: task.ChartData JSON length after step %d: %d bytes", stepNum, len(jsonData)) - } - } - - // Try to JSON marshal task.AppData - if len(task.AppData) > 0 { - if jsonData, err := json.Marshal(task.AppData); err != nil { - glog.Errorf("ERROR: JSON marshal failed for task.AppData after step %d: %v, AppData keys: %v", stepNum, err, h.getMapKeys(task.AppData)) - } else { - glog.V(3).Infof("DEBUG: task.AppData JSON length after step %d: %d bytes", stepNum, len(jsonData)) - } - } - - // Try to JSON marshal task.DatabaseUpdateData - if len(task.DatabaseUpdateData) > 0 { - if jsonData, err := json.Marshal(task.DatabaseUpdateData); err != nil { - glog.Errorf("ERROR: JSON marshal failed for task.DatabaseUpdateData after step %d: %v, DatabaseUpdateData keys: %v", stepNum, err, h.getMapKeys(task.DatabaseUpdateData)) - } else { - glog.V(3).Infof("DEBUG: task.DatabaseUpdateData JSON length after step %d: %d bytes", stepNum, len(jsonData)) - } - } -} - -// getMapKeys safely extracts keys from a map for debugging -func (h *Hydrator) getMapKeys(data map[string]interface{}) []string { - keys := make([]string, 0, len(data)) - for key := range data { - keys = append(keys, key) - } - return keys -} - -// cleanupTaskResources cleans up resources associated with a task func (h *Hydrator) cleanupTaskResources(task *hydrationfn.HydrationTask) { - // Clean up chart data - // if renderedDir, exists := task.ChartData["rendered_chart_dir"].(string); exists { - // if err := os.RemoveAll(renderedDir); err != nil { - // glog.Info("Warning: Failed to clean up rendered chart directory %s: %v", renderedDir, err) - // } - // } - - // Clean up source chart if sourceChartPath, exists := task.ChartData["source_chart_path"].(string); exists { if err := os.Remove(sourceChartPath); err != nil { glog.Errorf("Warning: Failed to clean up source chart file %s: %v", sourceChartPath, err) } } - - // Clear task data maps task.ChartData = make(map[string]interface{}) task.DatabaseUpdateData = make(map[string]interface{}) - - // Clear app data to reduce memory usage task.AppData = make(map[string]interface{}) } -// pendingDataMonitor monitors for new pending data and creates tasks -func (h *Hydrator) pendingDataMonitor(ctx context.Context) { - glog.V(3).Infoln("Pending data monitor started") - defer glog.V(3).Infoln("Pending data monitor stopped") - - ticker := time.NewTicker(time.Second * 30) // Check every 30 seconds - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - return - case <-h.stopChan: - return - case <-ticker.C: - h.checkForPendingData() - } - } -} - -// checkForPendingData scans cache for pending data and creates hydration tasks -func (h *Hydrator) checkForPendingData() { - // Use CacheManager's lock if available - if h.cacheManager != nil { - if !h.cacheManager.mutex.TryRLock() { - glog.Warning("[TryRLock] Hydrator.checkForPendingData: CacheManager read lock not available, skipping") - return - } - defer h.cacheManager.mutex.RUnlock() - - for userID, userData := range h.cache.Users { - // No nested locks needed since we already hold the global lock - for sourceID, sourceData := range userData.Sources { - // No nested locks needed since we already hold the global lock - - // Log source type for debugging - both local and remote should be processed - if len(sourceData.AppInfoLatestPending) > 0 { - glog.V(3).Infof("Checking pending data for user: %s, source: %s, type: %s, pending: %d", userID, sourceID, sourceData.Type, len(sourceData.AppInfoLatestPending)) - } - - // Check if there's pending data - process both local and remote sources - if len(sourceData.AppInfoLatestPending) > 0 { - glog.V(3).Infof("Found %d pending apps for user: %s, source: %s, type: %s", - len(sourceData.AppInfoLatestPending), userID, sourceID, sourceData.Type) - glog.V(3).Infof("DEBUG: About to process %d pending apps for user: %s, source: %s", len(sourceData.AppInfoLatestPending), userID, sourceID) - for i, pendingData := range sourceData.AppInfoLatestPending { - glog.V(3).Infof("DEBUG: Processing pending data %d/%d for user: %s, source: %s, pendingData: %v", i+1, len(sourceData.AppInfoLatestPending), userID, sourceID, pendingData != nil) - h.createTasksFromPendingData(userID, sourceID, pendingData) - } - } - } - } - } else { - glog.V(3).Infof("Warning: CacheManager not available for checkForPendingData") - } -} - -// createTasksFromPendingData creates hydration tasks from pending app data -func (h *Hydrator) createTasksFromPendingData(userID, sourceID string, pendingData *types.AppInfoLatestPendingData) { - if pendingData == nil { - glog.V(3).Infof("DEBUG: createTasksFromPendingData called with nil pendingData for user: %s, source: %s", userID, sourceID) - return - } - - glog.V(3).Infof("DEBUG: createTasksFromPendingData called for user: %s, source: %s, pendingData.RawData: %v", userID, sourceID, pendingData.RawData != nil) - - // For the new structure, we can work with RawData if it exists - if pendingData.RawData != nil { - // Handle regular structured RawData - appName := pendingData.RawData.Name - appID := pendingData.RawData.AppID - if appID == "" { - appID = pendingData.RawData.ID - } - - glog.V(3).Infof("DEBUG: Processing appID: %s %s for user: %s, source: %s", appID, appName, userID, sourceID) - - if appID != "" { - // Check if app is already in render failed list - if h.isAppInRenderFailedList(userID, sourceID, appID, appName) { - glog.V(3).Infof("App %s (user: %s, source: %s) is already in render failed list, skipping task creation", - appID, userID, sourceID) - return - } - - // Check if app hydration is already complete before creating new task - if h.isAppHydrationComplete(pendingData) { - glog.V(3).Infof("DEBUG: App hydration already complete for app: %s (user: %s, source: %s), skipping task creation", - appID, userID, sourceID) - return - } - - // Check if app already exists in latest queue before creating new task - // Extract version from pending data for version comparison - version := "" - if pendingData.RawData != nil { - version = pendingData.RawData.Version - } - if h.isAppInLatestQueue(userID, sourceID, appID, appName, version) { - glog.V(3).Infof("DEBUG: App already exists in latest queue for app: %s (user: %s, source: %s), skipping task creation", - appID, userID, sourceID) - return - } - - if !h.hasActiveTaskForApp(userID, sourceID, appID, appName) { - glog.V(3).Infof("DEBUG: No active task found for app: %s (user: %s, source: %s), proceeding with task creation", appID, userID, sourceID) - // Convert ApplicationInfoEntry to map for task creation - appDataMap := h.convertApplicationInfoEntryToMap(pendingData.RawData) - - if len(appDataMap) == 0 { - glog.V(3).Infof("Warning: Empty app data for app: %s (user: %s, source: %s), skipping task creation", - appID, userID, sourceID) - return - } - - // Create task with CacheManager for unified lock strategy - var cacheManager types.CacheManagerInterface - if h.cacheManager != nil { - cacheManager = h.cacheManager - } - task := hydrationfn.NewHydrationTaskWithManager( - userID, sourceID, appID, - appDataMap, h.cache, cacheManager, h.settingsManager, - ) - - if err := h.EnqueueTask(task); err != nil { - glog.Errorf("Failed to enqueue task for app: %s (user: %s, source: %s), error: %v", - appID, userID, sourceID, err) - } else { - glog.V(3).Infof("Created hydration task for structured app: %s (user: %s, source: %s)", - appID, userID, sourceID) - } - } - } - return - } -} - // isAppHydrationComplete checks if an app has completed all hydration steps func (h *Hydrator) isAppHydrationComplete(pendingData *types.AppInfoLatestPendingData) bool { @@ -656,77 +228,41 @@ func (h *Hydrator) isAppHydrationComplete(pendingData *types.AppInfoLatestPendin appName = pendingData.RawData.Name } - glog.V(3).Infof("DEBUG: isAppHydrationComplete checking appID=%s, name=%s, RawPackage=%s, RenderedPackage=%s", + glog.V(3).Infof("DEBUG: isAppHydrationComplete checking appID=%s(%s), RawPackage=%s, RenderedPackage=%s", appID, appName, pendingData.RawPackage, pendingData.RenderedPackage) if pendingData.RawPackage == "" { - glog.Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - RawPackage is empty for appID=%s, name=%s", appID, appName) + glog.Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - RawPackage is empty for appID=%s(%s)", appID, appName) return false } if pendingData.RenderedPackage == "" { - glog.Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - RenderedPackage is empty for appID=%s, name=%s", appID, appName) + glog.Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - RenderedPackage is empty for appID=%s(%s)", appID, appName) return false } if pendingData.AppInfo == nil { - glog.Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - AppInfo is nil for appID=%s, name=%s", appID, appName) + glog.Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - AppInfo is nil for appID=%s(%s)", appID, appName) return false } imageAnalysis := pendingData.AppInfo.ImageAnalysis if imageAnalysis == nil { - glog.Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - ImageAnalysis is nil for appID=%s, name=%s", appID, appName) + glog.Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - ImageAnalysis is nil for appID=%s(%s)", appID, appName) return false } if imageAnalysis.TotalImages > 0 { - glog.Infof("DEBUG: isAppHydrationComplete RETURNING TRUE - TotalImages > 0 for appID=%s, name=%s, TotalImages: %d", appID, appName, imageAnalysis.TotalImages) + glog.Infof("DEBUG: isAppHydrationComplete RETURNING TRUE - TotalImages > 0 for appID=%s(%s), TotalImages: %d", appID, appName, imageAnalysis.TotalImages) return true } if imageAnalysis.TotalImages == 0 && imageAnalysis.Images != nil { - glog.Infof("DEBUG: isAppHydrationComplete RETURNING TRUE - TotalImages=0 but Images not nil for appID=%s, name=%s, Images: %v", appID, appName, imageAnalysis.Images) + glog.Infof("DEBUG: isAppHydrationComplete RETURNING TRUE - TotalImages=0 but Images not nil for appID=%s(%s), Images: %v", appID, appName, imageAnalysis.Images) return true } - glog.V(2).Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - ImageAnalysis incomplete for appID=%s, name=%s, TotalImages: %d, Images: %v", appID, appName, imageAnalysis.TotalImages, imageAnalysis.Images) - return false -} - -// isAppDataHydrationComplete checks if an app's hydration is complete by looking up pending data in cache -func (h *Hydrator) isAppDataHydrationComplete(userID, sourceID, appID string) bool { - // Use CacheManager's lock if available - if h.cacheManager != nil { - if !h.cacheManager.mutex.TryRLock() { - glog.Warningf("[TryRlock] Hydrator.isAppDataHydrationComplete: CacheManager read lock not available for user %s, source %s, app %s, returning false", userID, sourceID, appID) - return false - } - defer h.cacheManager.mutex.RUnlock() - - userData, userExists := h.cache.Users[userID] - if !userExists { - return false - } - - sourceData, sourceExists := userData.Sources[sourceID] - if !sourceExists { - return false - } - - // Find the pending data for the specific app - for _, pendingData := range sourceData.AppInfoLatestPending { - if pendingData.RawData != nil && - (pendingData.RawData.ID == appID || pendingData.RawData.AppID == appID || pendingData.RawData.Name == appID) { - // Found the pending data for this app, check if hydration is complete - return h.isAppHydrationComplete(pendingData) - } - } - } else { - glog.V(3).Infof("Warning: CacheManager not available for isAppDataHydrationComplete") - } - - // If no pending data found for this app, consider it not hydrated + glog.V(2).Infof("DEBUG: isAppHydrationComplete RETURNING FALSE - ImageAnalysis incomplete for appID=%s(%s), TotalImages: %d, Images: %v", appID, appName, imageAnalysis.TotalImages, imageAnalysis.Images) return false } @@ -891,38 +427,6 @@ func (h *Hydrator) deepCopyValue(value interface{}, visited map[uintptr]bool) in } } -// hasActiveTaskForApp checks if there's already an active task for the given app -func (h *Hydrator) hasActiveTaskForApp(userID, sourceID, appID, appName string) bool { - if !h.taskMutex.TryRLock() { - glog.Warningf("[TryRLock] Failed to acquire read lock for hasActiveTaskForApp, returning false, user: %s, source: %s, id: %s, name: %s", userID, sourceID, appID, appName) - return false - } - defer h.taskMutex.RUnlock() - - if len(h.activeTasks) > 0 { - glog.V(4).Infof("DEBUG: Checking active tasks for app: %s (user: %s, source: %s), total active tasks: %d", appID, userID, sourceID, len(h.activeTasks)) - } - - for _, task := range h.activeTasks { - if task.UserID == userID && task.SourceID == sourceID && task.AppID == appID { - glog.V(4).Infof("DEBUG: Found active task for app: %s (user: %s, source: %s), taskID: %s", appID, userID, sourceID, task.ID) - return true - } - } - glog.V(4).Infof("DEBUG: No active task found for app: %s (user: %s, source: %s)", appID, userID, sourceID) - return false -} - -// trackTask adds task to active tasks tracking -func (h *Hydrator) trackTask(task *hydrationfn.HydrationTask) { - if !h.taskMutex.TryLock() { - glog.Warningf("[TryLock] Failed to acquire lock for trackTask, skipping task tracking, task: %s, name: %s, version: %s", task.ID, task.AppName, task.AppVersion) - return - } - defer h.taskMutex.Unlock() - h.activeTasks[task.ID] = task -} - // markTaskCompleted moves task from active to completed func (h *Hydrator) markTaskCompleted(task *hydrationfn.HydrationTask, startedAt time.Time, duration time.Duration) { // Extract file path for cleanup before the lock @@ -931,10 +435,7 @@ func (h *Hydrator) markTaskCompleted(task *hydrationfn.HydrationTask, startedAt sourceChartPath = path } - if !h.taskMutex.TryLock() { - glog.Warningf("[TryLock] Failed to acquire lock for markTaskCompleted, skipping status update, task: %s, user: %s, source: %s, id: %s, name: %s, version: %s", task.ID, task.UserID, task.SourceID, task.AppID, task.AppName, task.AppVersion) - return - } + h.taskMutex.Lock() delete(h.activeTasks, task.ID) // Clean up in-memory data under lock @@ -976,11 +477,7 @@ func (h *Hydrator) markTaskFailed(task *hydrationfn.HydrationTask, startedAt tim sourceChartPath = path } - if !h.taskMutex.TryLock() { - glog.Warningf("[TryLock] Failed to acquire lock for markTaskFailed, skipping status update, task: %s, user: %s, source: %s, id: %s, name: %s, version: %s, error: %s", task.ID, task.UserID, task.SourceID, task.AppID, task.AppName, task.AppVersion, errorMsg) - return - } - + h.taskMutex.Lock() task.SetStatus(hydrationfn.TaskStatusFailed) delete(h.activeTasks, task.ID) @@ -1025,10 +522,7 @@ func (h *Hydrator) markTaskFailed(task *hydrationfn.HydrationTask, startedAt tim // addToCompletedHistory adds a task to the completed tasks history func (h *Hydrator) addToCompletedHistory(task *hydrationfn.HydrationTask, startedAt time.Time, duration time.Duration) { - if !h.workerStatusMutex.TryLock() { - glog.Warningf("[TryLock] Failed to acquire lock for addToCompletedHistory, skipping, task: %s, user: %s, source: %s, id: %s, name: %s, version: %s", task.ID, task.UserID, task.SourceID, task.AppID, task.AppName, task.AppVersion) - return - } + h.workerStatusMutex.Lock() defer h.workerStatusMutex.Unlock() entry := &TaskHistoryEntry{ @@ -1052,10 +546,7 @@ func (h *Hydrator) addToCompletedHistory(task *hydrationfn.HydrationTask, starte // addToFailedHistory adds a task to the failed tasks history func (h *Hydrator) addToFailedHistory(task *hydrationfn.HydrationTask, startedAt time.Time, duration time.Duration, failedStep string, errorMsg string) { - if !h.workerStatusMutex.TryLock() { - glog.Warningf("[TryLock] Failed to acquire lock for addToFailedHistory, skipping, task: %s, user: %s, source: %s, id: %s, name: %s, version: %s", task.ID, task.UserID, task.SourceID, task.AppID, task.AppName, task.AppVersion) - return - } + h.workerStatusMutex.Lock() defer h.workerStatusMutex.Unlock() entry := &TaskHistoryEntry{ @@ -1086,40 +577,7 @@ func (h *Hydrator) moveTaskToRenderFailed(task *hydrationfn.HydrationTask, failu return } - // Find the pending data for this task - var pendingData *types.AppInfoLatestPendingData - if h.cacheManager != nil { - if !h.cacheManager.mutex.TryRLock() { - glog.Warningf("[TryRLock] Hydrator.moveTaskToRenderFailed: CacheManager read lock not available for user %s, skipping, source: %s, id: %s, name: %s, version: %s", task.UserID, task.SourceID, task.AppID, task.AppName, task.AppVersion) - return - } - userData, userExists := h.cache.Users[task.UserID] - if !userExists { - h.cacheManager.mutex.RUnlock() - glog.Warningf("Warning: User data not found for task: %s, user: %s", task.ID, task.UserID) - return - } - - sourceData, sourceExists := userData.Sources[task.SourceID] - if !sourceExists { - h.cacheManager.mutex.RUnlock() - glog.V(3).Infof("Warning: Source data not found for task: %s, user: %s, source: %s", task.ID, task.UserID, task.SourceID) - return - } - - // Find the pending data for this app - for _, pending := range sourceData.AppInfoLatestPending { - if pending.RawData != nil && - (pending.RawData.ID == task.AppID || pending.RawData.AppID == task.AppID || pending.RawData.Name == task.AppID) { - pendingData = pending - break - } - } - h.cacheManager.mutex.RUnlock() - } else { - glog.V(3).Infof("Warning: CacheManager not available for moveTaskToRenderFailed") - return - } + pendingData := h.cacheManager.FindPendingDataForApp(task.UserID, task.SourceID, task.AppID) if pendingData == nil { glog.V(3).Infof("Warning: Pending data not found for task: %s, app: %s", task.ID, task.AppID) @@ -1132,13 +590,13 @@ func (h *Hydrator) moveTaskToRenderFailed(task *hydrationfn.HydrationTask, failu // Add to render failed list in cache if err := h.cacheManager.SetAppData(task.UserID, task.SourceID, types.AppRenderFailed, map[string]interface{}{ "failed_app": failedData, - }); err != nil { + }, "Hydrator"); err != nil { glog.Errorf("Failed to add task to render failed list: %s, error: %v", task.ID, err) return } - glog.V(2).Infof("Successfully moved task %s (app: %s) to render failed list with reason: %s, step: %s", - task.ID, task.AppID, failureReason, failureStep) + glog.V(2).Infof("Successfully moved task %s (app: %s/%s/%s) to render failed list with reason: %s, step: %s", + task.ID, task.AppID, task.AppName, task.AppVersion, failureReason, failureStep) // Remove from pending list h.removeFromPendingList(task.UserID, task.SourceID, task.AppID, task.AppName, task.AppVersion) @@ -1150,115 +608,35 @@ func (h *Hydrator) removeFromPendingList(userID, sourceID, appID, appName, appVe glog.V(3).Infof("Warning: CacheManager not available for removeFromPendingList") return } - - // 1) Read-lock phase: locate index to remove (no writes under RLock) - if !h.cacheManager.mutex.TryRLock() { - glog.Warningf("[TryRLock] Hydrator.removeFromPendingList: CacheManager read lock not available for user %s, source %s, app %s %s %s, skipping", userID, sourceID, appID, appName, appVersion) - return - } - userData, userExists := h.cache.Users[userID] - if !userExists { - h.cacheManager.mutex.RUnlock() - return - } - sourceData, sourceExists := userData.Sources[sourceID] - if !sourceExists { - h.cacheManager.mutex.RUnlock() - return - } - removeIdx := -1 - for i, pending := range sourceData.AppInfoLatestPending { - if pending != nil && pending.RawData != nil && - (pending.RawData.ID == appID || pending.RawData.AppID == appID || pending.RawData.Name == appID) { - removeIdx = i - break - } - } - h.cacheManager.mutex.RUnlock() - - if removeIdx == -1 { - return - } - - // 2) Try to acquire short write-lock and apply removal with new slice; skip if contended - // Use TryLock to avoid blocking - if !h.cacheManager.mutex.TryLock() { - glog.Warningf("[TryLock] DEBUG: removeFromPendingList skipped (lock not available) for user=%s source=%s app=%s %s %s", userID, sourceID, appID, appName, appVersion) - return - } - defer h.cacheManager.mutex.Unlock() - - // Re-validate pointers under write-lock - if userData2, ok := h.cache.Users[userID]; ok { - if sourceData2, ok2 := userData2.Sources[sourceID]; ok2 { - if removeIdx >= 0 && removeIdx < len(sourceData2.AppInfoLatestPending) { - // Re-check match to be safe - p := sourceData2.AppInfoLatestPending[removeIdx] - if p != nil && p.RawData != nil && (p.RawData.ID == appID || p.RawData.AppID == appID || p.RawData.Name == appID) { - // Create new slice dropping index removeIdx - old := sourceData2.AppInfoLatestPending - newSlice := make([]*types.AppInfoLatestPendingData, 0, len(old)-1) - newSlice = append(newSlice, old[:removeIdx]...) - if removeIdx+1 <= len(old)-1 { - newSlice = append(newSlice, old[removeIdx+1:]...) - } - sourceData2.AppInfoLatestPending = newSlice - glog.V(2).Infof("Removed app %s from pending list for user: %s, source: %s", appID, userID, sourceID) - } - } - } - } + h.cacheManager.RemoveFromPendingList(userID, sourceID, appID) + glog.V(2).Infof("Removed app %s(%s) from pending list for user: %s, source: %s", appID, appName, userID, sourceID) } // GetMetrics returns hydrator metrics func (h *Hydrator) GetMetrics() HydratorMetrics { - if !h.taskMutex.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for GetMetrics, returning zero metrics") - // Try to get worker status even if we can't get task lock - var workers []*WorkerStatus - if h.workerStatusMutex.TryRLock() { - workers = h.getWorkerStatusList() - h.workerStatusMutex.RUnlock() - } - return HydratorMetrics{ - TotalTasksProcessed: h.totalTasksProcessed, - TotalTasksSucceeded: h.totalTasksSucceeded, - TotalTasksFailed: h.totalTasksFailed, - ActiveTasksCount: 0, - CompletedTasksCount: 0, - FailedTasksCount: 0, - QueueLength: int64(len(h.taskQueue)), - ActiveTasks: []*TaskInfo{}, - RecentCompletedTasks: h.getRecentCompletedTasks(), - RecentFailedTasks: h.getRecentFailedTasks(), - Workers: workers, - } - } - - // Get active tasks info + h.taskMutex.RLock() activeTasksList := make([]*TaskInfo, 0, len(h.activeTasks)) for _, task := range h.activeTasks { if task != nil { activeTasksList = append(activeTasksList, h.taskToTaskInfo(task)) } } - + activeCount := int64(len(h.activeTasks)) + completedCount := int64(len(h.completedTasks)) + failedCount := int64(len(h.failedTasks)) h.taskMutex.RUnlock() - // Get worker status - var workers []*WorkerStatus - if h.workerStatusMutex.TryRLock() { - workers = h.getWorkerStatusList() - h.workerStatusMutex.RUnlock() - } + h.workerStatusMutex.RLock() + workers := h.getWorkerStatusList() + h.workerStatusMutex.RUnlock() return HydratorMetrics{ TotalTasksProcessed: h.totalTasksProcessed, TotalTasksSucceeded: h.totalTasksSucceeded, TotalTasksFailed: h.totalTasksFailed, - ActiveTasksCount: int64(len(h.activeTasks)), - CompletedTasksCount: int64(len(h.completedTasks)), - FailedTasksCount: int64(len(h.failedTasks)), + ActiveTasksCount: activeCount, + CompletedTasksCount: completedCount, + FailedTasksCount: failedCount, QueueLength: int64(len(h.taskQueue)), ActiveTasks: activeTasksList, RecentCompletedTasks: h.getRecentCompletedTasks(), @@ -1322,13 +700,9 @@ func (h *Hydrator) getWorkerStatusList() []*WorkerStatus { // getRecentCompletedTasks returns recent completed tasks (thread-safe) func (h *Hydrator) getRecentCompletedTasks() []*TaskHistoryEntry { - // Return a copy to avoid race conditions - if !h.workerStatusMutex.TryRLock() { - return make([]*TaskHistoryEntry, 0) - } + h.workerStatusMutex.RLock() defer h.workerStatusMutex.RUnlock() - // Return a copy result := make([]*TaskHistoryEntry, len(h.recentCompletedTasks)) copy(result, h.recentCompletedTasks) return result @@ -1336,13 +710,9 @@ func (h *Hydrator) getRecentCompletedTasks() []*TaskHistoryEntry { // getRecentFailedTasks returns recent failed tasks (thread-safe) func (h *Hydrator) getRecentFailedTasks() []*TaskHistoryEntry { - // Return a copy to avoid race conditions - if !h.workerStatusMutex.TryRLock() { - return make([]*TaskHistoryEntry, 0) - } + h.workerStatusMutex.RLock() defer h.workerStatusMutex.RUnlock() - // Return a copy result := make([]*TaskHistoryEntry, len(h.recentFailedTasks)) copy(result, h.recentFailedTasks) return result @@ -1369,206 +739,6 @@ func CreateDefaultHydrator(cache *types.CacheData, settingsManager *settings.Set return NewHydrator(cache, settingsManager, cacheManager, config) } -// NotifyPendingDataUpdate implements HydrationNotifier interface -// Processes pending data update notification and creates hydration tasks immediately -func (h *Hydrator) NotifyPendingDataUpdate(userID, sourceID string, pendingData map[string]interface{}) { - if !h.IsRunning() { - glog.V(3).Infof("Hydrator is not running, ignoring pending data notification for user: %s, source: %s", userID, sourceID) - return - } - - glog.V(3).Infof("Received pending data update notification for user: %s, source: %s", userID, sourceID) - - // Create tasks from the pending data immediately - h.createTasksFromPendingDataMap(userID, sourceID, pendingData) -} - -// createTasksFromPendingDataMap creates hydration tasks from pending data map -func (h *Hydrator) createTasksFromPendingDataMap(userID, sourceID string, pendingData map[string]interface{}) { - glog.V(3).Infof("Creating tasks from pending data for user: %s, source: %s", userID, sourceID) - - // Extract data section from pendingData - dataSection, ok := pendingData["data"] - if !ok { - glog.V(3).Infof("No data section found in pending data for user: %s, source: %s", userID, sourceID) - return - } - - // Handle different data section formats - var appsMap map[string]interface{} - - // First, try to handle the case where dataSection is an AppStoreDataSection struct - glog.V(2).Infof("Data section type: %T for user: %s, source: %s", dataSection, userID, sourceID) - - // Check if it's an AppStoreDataSection by checking if it has Apps field - if dataStruct := dataSection; dataStruct != nil { - // Use reflection or type assertion to access the Apps field - - // Try to access as map first (for backwards compatibility) - if dataMap, ok := dataSection.(map[string]interface{}); ok { - // Check if it's in the expected format with "apps" key - if apps, hasApps := dataMap["apps"]; hasApps { - if appsMapValue, ok := apps.(map[string]interface{}); ok { - appsMap = appsMapValue - glog.V(3).Infof("Found apps data in standard map format for user: %s, source: %s", userID, sourceID) - } - } else { - // Check if the dataMap itself contains app entries - if h.looksLikeAppsMap(dataMap) { - appsMap = dataMap - glog.V(3).Infof("Data section appears to contain apps directly for user: %s, source: %s", userID, sourceID) - } - } - } else { - // Try to handle AppStoreDataSection struct using interface conversion - glog.V(2).Infof("Unsupported data format for user: %s, source: %s. Expected map[string]interface{} but got %T", userID, sourceID, dataSection) - glog.V(2).Infof("Data section content: %+v", dataSection) - return - } - } - - if appsMap == nil || len(appsMap) == 0 { - glog.V(3).Infof("No apps found in pending data for user: %s, source: %s", userID, sourceID) - return - } - - glog.V(2).Infof("Found %d apps in pending data for user: %s, source: %s", len(appsMap), userID, sourceID) - - // Create hydration task for each app - for appID, appData := range appsMap { - // Validate app data - if appMap, ok := appData.(map[string]interface{}); ok { - // Check if app data contains necessary raw data fields before creating task - if !h.hasRequiredRawDataFields(appMap) { - glog.V(3).Infof("App %s (user: %s, source: %s) missing required raw data fields, skipping task creation", - appID, userID, sourceID) - continue - } - var appName = appMap["name"].(string) - // Check if task already exists for this app to avoid duplicates - if !h.hasActiveTaskForApp(userID, sourceID, appID, appName) { - // Check if app is already in render failed list - if h.isAppInRenderFailedList(userID, sourceID, appID, appName) { - glog.V(3).Infof("App %s (user: %s, source: %s) is already in render failed list, skipping task creation", - appID, userID, sourceID) - continue - } - - // Check if app hydration is already complete before creating new task - // Extract version from app data for version comparison - version := "" - if versionValue, exists := appMap["version"]; exists && versionValue != nil { - if versionStr, ok := versionValue.(string); ok { - version = versionStr - } - } - if h.isAppInLatestQueue(userID, sourceID, appID, appName, version) { - // glog.Infof("App hydration already complete for app: %s (user: %s, source: %s), skipping task creation", - // appID, userID, sourceID) - continue - } - - if len(appMap) == 0 { - glog.V(3).Infof("Warning: Empty app data for app: %s (user: %s, source: %s), skipping task creation", - appID, userID, sourceID) - continue - } - - // Create and submit task with CacheManager for unified lock strategy - var cacheManager types.CacheManagerInterface - if h.cacheManager != nil { - cacheManager = h.cacheManager - } - task := hydrationfn.NewHydrationTaskWithManager( - userID, sourceID, appID, - appMap, h.cache, cacheManager, h.settingsManager, - ) - - if err := h.EnqueueTask(task); err != nil { - glog.Errorf("Failed to enqueue hydration task for app %s (user: %s, source: %s): %v", - appID, userID, sourceID, err) - } else { - glog.V(3).Infof("Successfully enqueued hydration task for app %s (user: %s, source: %s)", - appID, userID, sourceID) - } - } else { - glog.V(3).Infof("Task already exists for app: %s (user: %s, source: %s), skipping", appID, userID, sourceID) - } - } else { - glog.V(3).Infof("Invalid app data format for app %s (user: %s, source: %s)", appID, userID, sourceID) - } - } -} - -// hasRequiredRawDataFields checks if app data contains the minimum required fields for hydration -func (h *Hydrator) hasRequiredRawDataFields(appMap map[string]interface{}) bool { - if appMap == nil { - return false - } - - // Required fields that must be present for hydration to succeed - requiredFields := []string{"id", "name", "appID"} - - // Check if at least one of the required fields exists - hasRequiredField := false - for _, field := range requiredFields { - if value, exists := appMap[field]; exists && value != nil && value != "" { - hasRequiredField = true - break - } - } - - if !hasRequiredField { - return false - } - - // Additional recommended fields that indicate this is valid app data - recommendedFields := []string{"title", "version", "description", "chartName"} - hasRecommendedField := false - - for _, field := range recommendedFields { - if value, exists := appMap[field]; exists && value != nil && value != "" { - hasRecommendedField = true - break - } - } - - // Log warning if missing recommended fields but still proceed - if !hasRecommendedField { - glog.V(3).Infof("Warning: App data missing recommended fields (title, version, description, chartName), but proceeding with required fields") - } - - return hasRequiredField -} - -// looksLikeAppsMap checks if a map looks like it contains app entries -func (h *Hydrator) looksLikeAppsMap(data map[string]interface{}) bool { - // Sample a few entries to see if they look like app data - sampleCount := 0 - maxSamples := 3 - - for _, value := range data { - if sampleCount >= maxSamples { - break - } - - if appMap, ok := value.(map[string]interface{}); ok { - // Check if this app data has required raw data fields - if h.hasRequiredRawDataFields(appMap) { - sampleCount++ - } else { - // If this entry doesn't have required fields, it's probably not valid app data - return false - } - } else { - // Non-map entries suggest this is not an apps map - return false - } - } - - return sampleCount > 0 -} - // SetCacheManager removed: cacheManager must be provided at NewHydrator // batchCompletionProcessor processes completed tasks in batches @@ -1619,10 +789,7 @@ func (h *Hydrator) databaseSyncMonitor(ctx context.Context) { // processCompletedTask processes a single completed task func (h *Hydrator) processCompletedTask(taskID string) { - if !h.taskMutex.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for processCompletedTask, skipping") - return - } + h.taskMutex.RLock() task, exists := h.completedTasks[taskID] h.taskMutex.RUnlock() @@ -1637,10 +804,7 @@ func (h *Hydrator) processCompletedTask(taskID string) { // processBatchCompletions processes completed tasks in batches func (h *Hydrator) processBatchCompletions() { - if !h.taskMutex.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for processBatchCompletions, skipping") - return - } + h.taskMutex.RLock() currentCompleted := h.totalTasksSucceeded h.taskMutex.RUnlock() @@ -1660,11 +824,7 @@ func (h *Hydrator) checkAndSyncToDatabase() { return } - // Check if there are completed tasks that need syncing - if !h.taskMutex.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for checkAndSyncToDatabase, skipping") - return - } + h.taskMutex.RLock() completedCount := len(h.completedTasks) h.taskMutex.RUnlock() @@ -1686,7 +846,7 @@ func (h *Hydrator) triggerDatabaseSync() { return } - glog.V(3).Infof("Triggering database synchronization") + glog.V(2).Infof("Triggering database synchronization") // Force sync all cache data to Redis/database if err := h.cacheManager.ForceSync(); err != nil { @@ -1702,10 +862,7 @@ func (h *Hydrator) triggerDatabaseSync() { // cleanupOldCompletedTasks removes old completed tasks from memory func (h *Hydrator) cleanupOldCompletedTasks() { - if !h.taskMutex.TryLock() { - glog.Warning("[TryLock] Failed to acquire lock for cleanupOldCompletedTasks, skipping") - return - } + h.taskMutex.Lock() defer h.taskMutex.Unlock() // Keep only the most recent 100 completed tasks @@ -1742,10 +899,7 @@ func (h *Hydrator) monitorMemoryUsage() { h.lastMemoryCheck = time.Now() - if !h.taskMutex.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for monitorMemoryUsage, skipping") - return - } + h.taskMutex.RLock() activeCount := len(h.activeTasks) completedCount := len(h.completedTasks) failedCount := len(h.failedTasks) @@ -1764,10 +918,7 @@ func (h *Hydrator) monitorMemoryUsage() { // cleanupOldTasks cleans up old tasks from all task maps func (h *Hydrator) cleanupOldTasks() { - if !h.taskMutex.TryLock() { - glog.Warning("[TryLock] Failed to acquire lock for cleanupOldTasks, skipping") - return - } + h.taskMutex.Lock() defer h.taskMutex.Unlock() now := time.Now() @@ -1842,274 +993,22 @@ func (h *Hydrator) cleanupOldTasks() { func (h *Hydrator) isAppInLatestQueue(userID, sourceID, appID, appName, version string) bool { glog.V(3).Infof("DEBUG: isAppInLatestQueue checking appID=%s %s, version=%s for user=%s, source=%s", appID, appName, version, userID, sourceID) - // Use CacheManager's lock if available - if h.cacheManager != nil { - if !h.cacheManager.mutex.TryRLock() { - glog.Warningf("[TryRLock] Hydrator.isAppInLatestQueue: CacheManager read lock not available for user: %s, source: %s, app: %s %s %s, returning false", userID, sourceID, appID, appName, version) - return false - } - defer h.cacheManager.mutex.RUnlock() - - userData, userExists := h.cache.Users[userID] - if !userExists { - return false - } - - sourceData, sourceExists := userData.Sources[sourceID] - if !sourceExists { - return false - } - - // Check if app exists in AppInfoLatest queue - for _, latestData := range sourceData.AppInfoLatest { - if latestData == nil { - continue - } - - // Check RawData first - if latestData.RawData != nil { - if latestData.RawData.ID == appID || - latestData.RawData.AppID == appID || - latestData.RawData.Name == appID { - // Add version comparison - only return true if versions match - if version != "" && latestData.RawData.Version != version { - glog.V(3).Infof("App %s found in latest queue but version mismatch: current=%s, latest=%s, skipping", - appID, version, latestData.RawData.Version) - continue - } - glog.V(3).Infof("App %s found in latest queue with matching version: %s", appID, version) - return true - } - } - - // Check AppInfo.AppEntry - if latestData.AppInfo != nil && latestData.AppInfo.AppEntry != nil { - if latestData.AppInfo.AppEntry.ID == appID || - latestData.AppInfo.AppEntry.AppID == appID || - latestData.AppInfo.AppEntry.Name == appID { - // Add version comparison - only return true if versions match - if version != "" && latestData.AppInfo.AppEntry.Version != version { - glog.V(3).Infof("App %s found in latest queue but version mismatch: current=%s, latest=%s, skipping", - appID, version, latestData.AppInfo.AppEntry.Version) - continue - } - glog.V(3).Infof("App %s found in latest queue with matching version: %s", appID, version) - return true - } - } - - // Check AppSimpleInfo - if latestData.AppSimpleInfo != nil { - if latestData.AppSimpleInfo.AppID == appID || - latestData.AppSimpleInfo.AppName == appID { - // For AppSimpleInfo, we may not have version info, so only check if version is empty - if version == "" { - glog.V(3).Infof("App %s found in latest queue (AppSimpleInfo)", appID) - return true - } - // If version is provided but AppSimpleInfo doesn't have version, skip - glog.V(3).Infof("App %s found in latest queue but AppSimpleInfo has no version info, skipping", appID) - continue - } - } - } - } else { + if h.cacheManager == nil { glog.V(3).Infof("Warning: CacheManager not available for isAppInLatestQueue") + return false } - glog.V(3).Infof("DEBUG: isAppInLatestQueue returning false for appID=%s, version=%s, user=%s, source=%s", appID, version, userID, sourceID) - return false -} - -// ForceAddTaskFromLatestData forces creation of hydration task from latest app data, skipping isAppInLatestQueue check -// This method is exposed for external use when you need to force add a task regardless of existing state -func (h *Hydrator) ForceAddTaskFromLatestData(userID, sourceID string, latestData *types.AppInfoLatestData) error { - if !h.IsRunning() { - return fmt.Errorf("hydrator is not running") - } - - if latestData == nil { - return fmt.Errorf("latest data is nil") - } - - // Extract app ID from latest data - var appID string - var appName string - if latestData.RawData != nil { - appID = latestData.RawData.AppID - appName = latestData.RawData.Name - if appID == "" { - appID = latestData.RawData.ID - } - } else if latestData.AppInfo != nil && latestData.AppInfo.AppEntry != nil { - appID = latestData.AppInfo.AppEntry.AppID - appName = latestData.AppInfo.AppEntry.Name - if appID == "" { - appID = latestData.AppInfo.AppEntry.ID - } - } else if latestData.AppSimpleInfo != nil { - appID = latestData.AppSimpleInfo.AppID - appName = latestData.AppSimpleInfo.AppName - } - - if appID == "" { - return fmt.Errorf("cannot extract app ID from latest data") - } - - // Check if task already exists for this app to avoid duplicates - if h.hasActiveTaskForApp(userID, sourceID, appID, appName) { - glog.V(3).Infof("Task already exists for app: %s (user: %s, source: %s), skipping force add", appID, userID, sourceID) - return nil - } - - // Check if app is already in render failed list - if h.isAppInRenderFailedList(userID, sourceID, appID, appName) { - glog.V(3).Infof("App %s (user: %s, source: %s) is already in render failed list, skipping force add", - appID, userID, sourceID) - return nil - } - - // Convert latest data to map for task creation - appDataMap := h.convertLatestDataToMap(latestData) - - if len(appDataMap) == 0 { - glog.V(3).Infof("Warning: Empty app data for app: %s (user: %s, source: %s), skipping task creation", - appID, userID, sourceID) - return nil - } - - // Create and submit task with CacheManager for unified lock strategy - var cacheManager types.CacheManagerInterface - if h.cacheManager != nil { - cacheManager = h.cacheManager - } - task := hydrationfn.NewHydrationTaskWithManager( - userID, sourceID, appID, - appDataMap, h.cache, cacheManager, h.settingsManager, - ) - - if err := h.EnqueueTask(task); err != nil { - glog.Errorf("Failed to enqueue force task for app: %s (user: %s, source: %s), error: %v", - appID, userID, sourceID, err) - return err - } - - glog.V(2).Infof("Successfully force added hydration task for app: %s (user: %s, source: %s)", - appID, userID, sourceID) - return nil -} - -// convertLatestDataToMap converts AppInfoLatestData to map for task creation -func (h *Hydrator) convertLatestDataToMap(latestData *types.AppInfoLatestData) map[string]interface{} { - if latestData == nil { - return make(map[string]interface{}) - } - - // Start with basic data - data := map[string]interface{}{ - "type": string(latestData.Type), - "timestamp": latestData.Timestamp, - "version": latestData.Version, - } - - // Add RawData if available - if latestData.RawData != nil { - rawDataMap := h.convertApplicationInfoEntryToMap(latestData.RawData) - // Merge raw data into main data map - for key, value := range rawDataMap { - data[key] = value - } - } - - // Add package information - if latestData.RawPackage != "" { - data["raw_package"] = latestData.RawPackage - } - if latestData.RenderedPackage != "" { - data["rendered_package"] = latestData.RenderedPackage - } - - // Add Values if available - if latestData.Values != nil && len(latestData.Values) > 0 { - valuesData := make([]map[string]interface{}, 0, len(latestData.Values)) - for _, value := range latestData.Values { - if value != nil { - valueMap := map[string]interface{}{ - "file_name": value.FileName, - "modify_type": string(value.ModifyType), - "modify_key": value.ModifyKey, - "modify_value": value.ModifyValue, - } - valuesData = append(valuesData, valueMap) - } - } - data["values"] = valuesData - } - - // Add AppInfo if available - if latestData.AppInfo != nil { - if latestData.AppInfo.AppEntry != nil { - appEntryMap := h.convertApplicationInfoEntryToMap(latestData.AppInfo.AppEntry) - // Merge app entry data - for key, value := range appEntryMap { - data[key] = value - } - } - if latestData.AppInfo.ImageAnalysis != nil { - data["image_analysis"] = latestData.AppInfo.ImageAnalysis - } - } - - // Add AppSimpleInfo if available - if latestData.AppSimpleInfo != nil { - data["app_simple_info"] = latestData.AppSimpleInfo - } - - return data + result := h.cacheManager.IsAppInLatestQueue(userID, sourceID, appID, version) + glog.V(3).Infof("DEBUG: isAppInLatestQueue returning %v for appID=%s, version=%s, user=%s, source=%s", result, appID, version, userID, sourceID) + return result } -// isAppInRenderFailedList checks if an app already exists in the render failed list -func (h *Hydrator) isAppInRenderFailedList(userID, sourceID, appID, appName string) bool { - // Use CacheManager's lock if available - if h.cacheManager != nil { - if !h.cacheManager.mutex.TryRLock() { - glog.Warningf("[TryRLock] Hydrator.isAppInRenderFailedList: CacheManager read lock not available for user %s, source %s, app %s %s, returning false", userID, sourceID, appID, appName) - return false - } - defer h.cacheManager.mutex.RUnlock() - - userData, userExists := h.cache.Users[userID] - if !userExists { - return false - } - - sourceData, sourceExists := userData.Sources[sourceID] - if !sourceExists { - return false - } - - // Check if app exists in render failed list - for _, failedData := range sourceData.AppRenderFailed { - if failedData.RawData != nil && - (failedData.RawData.ID == appID || failedData.RawData.AppID == appID || failedData.RawData.Name == appID) { - return true - } - } - } else { +// isAppInRenderFailedList checks if an app already exists in the render failed list. +// When version is provided, only same-version failure will block hydration. +func (h *Hydrator) isAppInRenderFailedList(userID, sourceID, appID, appName, version string) bool { + if h.cacheManager == nil { glog.V(2).Infof("Warning: CacheManager not available for isAppInRenderFailedList") + return false } - - return false -} - -// ForceCheckPendingData immediately triggers checkForPendingData without waiting for the 30-second interval -// This method can be called externally to force immediate processing of pending data -func (h *Hydrator) ForceCheckPendingData() { - if !h.IsRunning() { - glog.V(3).Infof("Hydrator is not running, cannot force check pending data") - return - } - - glog.V(3).Infof("Force checking pending data triggered externally") - h.checkForPendingData() + return h.cacheManager.IsAppInRenderFailedList(userID, sourceID, appID, appName, version) } diff --git a/internal/v2/appinfo/hydrationfn/task_for_api.go b/internal/v2/appinfo/hydrationfn/task_for_api.go index bfbbe86..72a6cd0 100644 --- a/internal/v2/appinfo/hydrationfn/task_for_api.go +++ b/internal/v2/appinfo/hydrationfn/task_for_api.go @@ -85,7 +85,7 @@ func (s *TaskForApiStep) Execute(ctx context.Context, task *HydrationTask) error Post(url) duration := time.Since(startTime) if err != nil || resp.StatusCode() >= 300 { - glog.Errorf("TaskForApiStep - Request failed in %v for user=%s, source=%s, app=%s: %v", duration, task.UserID, task.SourceID, task.AppID, err) + glog.Errorf("TaskForApiStep - Request failed in %v for user=%s, source=%s, app=%s(%s/%s): %v", duration, task.UserID, task.SourceID, task.AppID, task.AppName, task.AppVersion, err) } if err != nil { return fmt.Errorf("failed to call chart repo sync-app: %w", err) @@ -108,14 +108,14 @@ func (s *TaskForApiStep) Execute(ctx context.Context, task *HydrationTask) error if err := s.writeAppDataToCache(task, appData); err != nil { glog.Errorf("Warning: failed to write app_data to cache: %v", err) } else { - glog.V(3).Infof("Successfully wrote app_data to cache for user=%s, source=%s, app=%s, appName=%s", + glog.V(2).Infof("Successfully wrote app_data to cache for user=%s, source=%s, app=%s, appName=%s", task.UserID, task.SourceID, task.AppID, task.AppName) } } } } - glog.V(3).Info("SyncApp to chart repo completed successfully") + glog.V(2).Infof("[TaskForApi] SyncApp %s(%s %s) to chart repo completed successfully", task.AppID, task.AppName, task.AppVersion) return nil } @@ -164,58 +164,34 @@ func (s *TaskForApiStep) writeAppDataToCache(task *HydrationTask, appData interf return fmt.Errorf("app_data is not in expected format, app=%s, appName=%s", task.AppID, task.AppName) } - // Now acquire the lock for cache operations - if task.CacheManager != nil { - if !task.CacheManager.TryLock() { - return fmt.Errorf("write lock not available for cache update, user=%s, source=%s, app=%s, appName=%s", task.UserID, task.SourceID, task.AppID, task.AppName) - } - defer task.CacheManager.Unlock() - } - - // Find the pendingData in cache - pendingData := s.findPendingDataFromCache(task) - if pendingData == nil { - return fmt.Errorf("pendingData not found in cache for user=%s, source=%s, app=%s, appName=%s", task.UserID, task.SourceID, task.AppID, task.AppName) + if task.CacheManager == nil { + return fmt.Errorf("CacheManager not available for fixVersionHistoryFromPendingData") } - // Fix version history data - appInfoLatest.RawData.VersionHistory = pendingData.RawData.VersionHistory - appInfoLatest.AppInfo.AppEntry.VersionHistory = pendingData.RawData.VersionHistory - - // Preserve appLabels from pendingData if chartrepo didn't return them or returned empty array - // This is critical for delisted apps (with suspend/remove labels) that are still installed - if pendingData.RawData != nil && len(pendingData.RawData.AppLabels) > 0 { - // Check if chartrepo returned appLabels - chartrepoHasLabels := false - if appDataMap, ok := appData.(map[string]interface{}); ok { - if appInfoMap, ok := appDataMap["app_info"].(map[string]interface{}); ok { - if appEntryMap, ok := appInfoMap["app_entry"].(map[string]interface{}); ok { - if appLabels, ok := appEntryMap["appLabels"].([]interface{}); ok && len(appLabels) > 0 { - chartrepoHasLabels = true - } + // Check if chartrepo returned appLabels before taking the lock + chartrepoHasLabels := false + if appDataMap, ok := appData.(map[string]interface{}); ok { + if appInfoMap, ok := appDataMap["app_info"].(map[string]interface{}); ok { + if appEntryMap, ok := appInfoMap["app_entry"].(map[string]interface{}); ok { + if appLabels, ok := appEntryMap["appLabels"].([]interface{}); ok && len(appLabels) > 0 { + chartrepoHasLabels = true } } } - - // If chartrepo didn't return labels, preserve from pendingData - if !chartrepoHasLabels { - appInfoLatest.RawData.AppLabels = pendingData.RawData.AppLabels - appInfoLatest.AppInfo.AppEntry.AppLabels = pendingData.RawData.AppLabels + } + if chartrepoHasLabels { + // Clear the AppLabels so CopyPendingVersionHistory won't overwrite them + // (it only copies when latest has empty labels) + } else if appInfoLatest.RawData != nil { + appInfoLatest.RawData.AppLabels = nil + if appInfoLatest.AppInfo != nil && appInfoLatest.AppInfo.AppEntry != nil { + appInfoLatest.AppInfo.AppEntry.AppLabels = nil } } - // Overwrite all fields of pendingData (keep the pointer address, update all contents) - pendingData.Type = appInfoLatest.Type - pendingData.Timestamp = appInfoLatest.Timestamp - pendingData.Version = appInfoLatest.Version - pendingData.RawData = appInfoLatest.RawData - pendingData.RawPackage = appInfoLatest.RawPackage - pendingData.Values = appInfoLatest.Values - pendingData.AppInfo = appInfoLatest.AppInfo - pendingData.RenderedPackage = appInfoLatest.RenderedPackage - pendingData.AppSimpleInfo = appInfoLatest.AppSimpleInfo - - return nil + return task.CacheManager.CopyPendingVersionHistory( + task.UserID, task.SourceID, task.AppID, task.AppName, appInfoLatest, + ) } // findPendingDataFromCache finds AppInfoLatestPendingData from cache based on task information diff --git a/internal/v2/appinfo/pipeline.go b/internal/v2/appinfo/pipeline.go new file mode 100644 index 0000000..e647a91 --- /dev/null +++ b/internal/v2/appinfo/pipeline.go @@ -0,0 +1,420 @@ +package appinfo + +import ( + "context" + "os" + "strconv" + "sync" + "sync/atomic" + "time" + + "market/internal/v2/appinfo/hydrationfn" + "market/internal/v2/types" + + "github.com/golang/glog" +) + +// Pipeline orchestrates the serial execution of all data processing phases: +// +// Phase 1: Syncer - fetch remote app data +// Phase 2: Hydrator - process pending apps (hydration + move to Latest) +// Phase 3: DataWatcherRepo - process chart-repo state changes +// Phase 4: StatusCorrectionChecker - correct app running statuses +// Phase 5: Hash calculation + ForceSync +const defaultHydrationConcurrency = 5 + +type Pipeline struct { + cacheManager *CacheManager + cache *types.CacheData + syncer *Syncer + hydrator *Hydrator + dataWatcher *DataWatcher + dataWatcherRepo *DataWatcherRepo + statusCorrectionChecker *StatusCorrectionChecker + + mutex sync.Mutex + stopChan chan struct{} + isRunning atomic.Bool + interval time.Duration + hydrationConcurrency int +} + +func NewPipeline(cacheManager *CacheManager, cache *types.CacheData, interval time.Duration) *Pipeline { + if interval <= 0 { + interval = 30 * time.Second + } + + concurrency := defaultHydrationConcurrency + if v, err := strconv.Atoi(os.Getenv("PIPELINE_HYDRATION_CONCURRENCY")); err == nil && v > 0 { + concurrency = v + } + + return &Pipeline{ + cacheManager: cacheManager, + cache: cache, + stopChan: make(chan struct{}), + interval: interval, + hydrationConcurrency: concurrency, + } +} + +func (p *Pipeline) SetSyncer(s *Syncer) { p.syncer = s } +func (p *Pipeline) SetHydrator(h *Hydrator) { p.hydrator = h } +func (p *Pipeline) SetDataWatcher(dw *DataWatcher) { p.dataWatcher = dw } +func (p *Pipeline) SetDataWatcherRepo(dwr *DataWatcherRepo) { p.dataWatcherRepo = dwr } +func (p *Pipeline) SetStatusCorrectionChecker(scc *StatusCorrectionChecker) { + p.statusCorrectionChecker = scc +} + +func (p *Pipeline) Start(ctx context.Context) error { + if p.isRunning.Load() { + return nil + } + p.isRunning.Store(true) + go p.loop(ctx) + glog.Infof("Pipeline started with interval %v", p.interval) + return nil +} + +func (p *Pipeline) Stop() { + if !p.isRunning.Load() { + return + } + close(p.stopChan) + p.isRunning.Store(false) + glog.Info("Pipeline stopped") +} + +func (p *Pipeline) loop(ctx context.Context) { + glog.Info("Pipeline loop started") + defer glog.Info("Pipeline loop stopped") + + p.run(ctx) + + ticker := time.NewTicker(p.interval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-p.stopChan: + return + case <-ticker.C: + p.run(ctx) + } + } +} + +func (p *Pipeline) run(ctx context.Context) { + if !p.mutex.TryLock() { + glog.Warning("Pipeline: another run in progress, skipping") + return + } + defer p.mutex.Unlock() + + glog.V(2).Info("Pipeline: [LOOP] cycle start") + + startTime := time.Now() + + // add check current all users in cluster + p.cacheManager.RemoveDeletedUser() + + // Phase 1-4: only modify data, no hash calculation or ForceSync + p.phaseSyncer(ctx) + hydrateUsers := p.phaseHydrateApps(ctx) + repoUsers := p.phaseDataWatcherRepo(ctx) + statusUsers := p.phaseStatusCorrection(ctx) + + // Phase 5: merge all affected users + dirty users, calculate hash once, sync once + allAffected := make(map[string]bool) + for u := range hydrateUsers { + allAffected[u] = true + } + for u := range repoUsers { + allAffected[u] = true + } + for u := range statusUsers { + allAffected[u] = true + } + // Collect dirty users from event-driven paths (DataWatcherState) + if p.dataWatcher != nil { + for u := range p.dataWatcher.CollectAndClearDirtyUsers() { + allAffected[u] = true + } + } + + p.phaseHashAndSync(allAffected) + + cahedData := p.cacheManager.GetCachedData() + + glog.V(2).Infof("Pipeline: [LOOP] cycle completed in %v, cached: %s", time.Since(startTime), cahedData) +} + +// phaseSyncer fetches remote data +func (p *Pipeline) phaseSyncer(ctx context.Context) { + if p.syncer == nil { + return + } + select { + case <-ctx.Done(): + return + case <-p.stopChan: + return + default: + } + glog.V(3).Info("Pipeline Phase 1: Syncer") + p.syncer.SyncOnce(ctx) +} + +// phaseHydrateApps processes pending apps in concurrent batches through hydration + move to Latest. +// Batch size is controlled by hydrationConcurrency (default 5, env PIPELINE_HYDRATION_CONCURRENCY). +func (p *Pipeline) phaseHydrateApps(ctx context.Context) map[string]bool { + affectedUsers := make(map[string]bool) + if p.hydrator == nil || p.cacheManager == nil { + return affectedUsers + } + + count := p.cacheManager.RestoreRetryableFailedToPending(20) + glog.Infof("Pipeline Phase 2: restore %d Failed to Pending", count) + + items := p.cacheManager.CollectAllPendingItems() + + if len(items) == 0 { + glog.V(2).Info("Pipeline Phase 2: no pending apps to process") + return affectedUsers + } + + total := len(items) + batchSize := p.hydrationConcurrency + if batchSize <= 0 { + batchSize = defaultHydrationConcurrency + } + + // Filter out items whose user or source has been deleted since collection. + // CollectAllPendingItems returns a snapshot; async deletions (RemoveUserData, + // SyncMarketSourcesToCache) may have removed the user/source in the meantime. + validItems := make([]PendingItem, 0, len(items)) + for _, item := range items { + if p.cacheManager.GetSourceData(item.UserID, item.SourceID) == nil { + appID, appName := getAppIdentifiers(item.Pending) + glog.V(2).Infof("Pipeline Phase 2: skipping %s %s - user %s or source %s no longer exists", + appID, appName, item.UserID, item.SourceID) + continue + } + validItems = append(validItems, item) + } + + if len(validItems) == 0 { + glog.V(2).Infof("Pipeline Phase 2: all %d pending apps filtered out (user/source deleted)", total) + return affectedUsers + } + + if len(validItems) < total { + glog.V(2).Infof("Pipeline Phase 2: %d/%d pending apps remain after filtering deleted users/sources", + len(validItems), total) + } + + total = len(validItems) + glog.V(2).Infof("Pipeline Phase 2: processing %d pending apps (concurrency=%d)", total, batchSize) + + for batchStart := 0; batchStart < total; batchStart += batchSize { + select { + case <-ctx.Done(): + return affectedUsers + case <-p.stopChan: + return affectedUsers + default: + } + + batchEnd := batchStart + batchSize + if batchEnd > total { + batchEnd = total + } + batch := validItems[batchStart:batchEnd] + + // Log batch items + for i, item := range batch { + appID, appName := getAppIdentifiers(item.Pending) + glog.V(2).Infof("Pipeline Phase 2: [%d/%d] %s %s (user=%s, source=%s)", + batchStart+i+1, total, appID, appName, item.UserID, item.SourceID) + } + + // Process batch concurrently + type hydrateResult struct { + idx int + hydrated bool + } + results := make([]hydrateResult, len(batch)) + var wg sync.WaitGroup + + for i, item := range batch { + wg.Add(1) + go func(idx int, it PendingItem) { + defer wg.Done() + results[idx] = hydrateResult{ + idx: idx, + hydrated: p.hydrator.HydrateSingleApp(ctx, it.UserID, it.SourceID, it.Pending), + } + }(i, item) + } + wg.Wait() + + // Move hydrated apps to Latest (sequential — writes to the same source slice) + for i, item := range batch { + if results[i].hydrated && p.dataWatcher != nil { + p.dataWatcher.ProcessSingleAppToLatest(item.UserID, item.SourceID, item.Pending) + } + affectedUsers[item.UserID] = true + } + } + + return affectedUsers +} + +// phaseDataWatcherRepo processes chart-repo state changes +func (p *Pipeline) phaseDataWatcherRepo(ctx context.Context) map[string]bool { + if p.dataWatcherRepo == nil { + return nil + } + select { + case <-ctx.Done(): + return nil + case <-p.stopChan: + return nil + default: + } + glog.V(2).Info("Pipeline Phase 3: DataWatcherRepo") + return p.dataWatcherRepo.ProcessOnce() +} + +// phaseStatusCorrection corrects app running statuses +func (p *Pipeline) phaseStatusCorrection(ctx context.Context) map[string]bool { + if p.statusCorrectionChecker == nil { + return nil + } + select { + case <-ctx.Done(): + return nil + case <-p.stopChan: + return nil + default: + } + glog.V(2).Info("Pipeline Phase 4: StatusCorrectionChecker") + return p.statusCorrectionChecker.PerformStatusCheckOnce() +} + +// phaseHashAndSync calculates user hashes for all affected users and syncs to Redis. +// This is the single point where hash calculation and ForceSync happen per Pipeline cycle. +func (p *Pipeline) phaseHashAndSync(affectedUsers map[string]bool) { + if p.dataWatcher != nil && len(affectedUsers) > 0 { + glog.V(2).Infof("Pipeline Phase 5: calculating hash for %d affected users", len(affectedUsers)) + for userID := range affectedUsers { + userData := p.cacheManager.GetUserData(userID) + if userData != nil { + p.dataWatcher.CalculateAndSetUserHashDirect(userID, userData) + } + } + } + if p.cacheManager != nil { + if err := p.cacheManager.ForceSync(); err != nil { + glog.Errorf("Pipeline Phase 5: ForceSync rate limited: %v", err) + } + } +} + +func getAppIdentifiers(pd *types.AppInfoLatestPendingData) (string, string) { + if pd == nil || pd.RawData == nil { + return "unknown", "unknown" + } + appID := pd.RawData.AppID + if appID == "" { + appID = pd.RawData.ID + } + return appID, pd.RawData.Name +} + +// HydrateSingleApp runs hydration steps for a single app synchronously. +// Returns true if hydration completed and data is ready for move to Latest. +func (h *Hydrator) HydrateSingleApp(ctx context.Context, userID, sourceID string, pendingData *types.AppInfoLatestPendingData) bool { + if pendingData == nil || pendingData.RawData == nil { + return false + } + + appID := pendingData.RawData.AppID + if appID == "" { + appID = pendingData.RawData.ID + } + appName := pendingData.RawData.Name + if appID == "" { + return false + } + + version := "" + if pendingData.RawData != nil { + version = pendingData.RawData.Version + } + + if h.isAppInRenderFailedList(userID, sourceID, appID, appName, version) { + glog.V(2).Infof("HydrateSingleApp: skipping %s(%s) (user=%s, source=%s) - in render failed list, will retry after cleanup", + appID, appName, userID, sourceID) + return false + } + + if h.isAppHydrationComplete(pendingData) { + return true + } + + if h.isAppInLatestQueue(userID, sourceID, appID, appName, version) { + glog.V(2).Infof("HydrateSingleApp: skipping %s(%s) (user=%s, source=%s) - already in latest queue with version %s", + appID, appName, userID, sourceID, version) + return false + } + + appDataMap := h.convertApplicationInfoEntryToMap(pendingData.RawData) + if len(appDataMap) == 0 { + glog.V(2).Infof("HydrateSingleApp: skipping %s(%s) (user=%s, source=%s) - convertApplicationInfoEntryToMap returned empty", + appID, appName, userID, sourceID) + return false + } + + var cacheManagerIface types.CacheManagerInterface + if h.cacheManager != nil { + cacheManagerIface = h.cacheManager + } + task := hydrationfn.NewHydrationTaskWithManager( + userID, sourceID, appID, + appDataMap, h.cache, cacheManagerIface, h.settingsManager, + ) + + glog.V(3).Infof("HydrateSingleApp: processing %s %s (user=%s, source=%s)", appID, appName, userID, sourceID) + taskStartTime := time.Now() + + for _, step := range h.steps { + if step.CanSkip(ctx, task) { + task.IncrementStep() + continue + } + if err := step.Execute(ctx, task); err != nil { + failureReason := err.Error() + failureStep := step.GetStepName() + glog.Errorf("HydrateSingleApp: step %s failed for app %s(%s): %v", failureStep, appID, appName, err) + h.moveTaskToRenderFailed(task, failureReason, failureStep) + duration := time.Since(taskStartTime) + h.markTaskFailed(task, taskStartTime, duration, failureStep, failureReason) + return false + } + task.IncrementStep() + } + + if !h.isAppHydrationComplete(pendingData) { + glog.Warningf("HydrateSingleApp: steps completed but data incomplete for app %s(%s), will retry next cycle", appID, appName) + return false + } + + task.SetStatus(hydrationfn.TaskStatusCompleted) + duration := time.Since(taskStartTime) + h.markTaskCompleted(task, taskStartTime, duration) + glog.V(2).Infof("HydrateSingleApp: completed for app %s(%s) in %v", appID, appName, duration) + return true +} diff --git a/internal/v2/appinfo/status_correction_check.go b/internal/v2/appinfo/status_correction_check.go index 6e6f469..1bd8e15 100644 --- a/internal/v2/appinfo/status_correction_check.go +++ b/internal/v2/appinfo/status_correction_check.go @@ -102,11 +102,41 @@ func (scc *StatusCorrectionChecker) Start() error { glog.Infof("Middleware service endpoint: http://%s:%s/app-service/v1/middlewares/status", scc.appServiceHost, scc.appServicePort) // Start the periodic checking goroutine - go scc.runPeriodicCheck() + go scc.runPeriodicCheck() // not used return nil } +// StartWithOptions starts with options +func (scc *StatusCorrectionChecker) StartWithOptions(enablePeriodicCheck bool) error { + scc.mutex.Lock() + defer scc.mutex.Unlock() + + if scc.isRunning { + return fmt.Errorf("status correction checker is already running") + } + + scc.isRunning = true + + if enablePeriodicCheck { + glog.Infof("Starting status correction checker with interval: %v", scc.checkInterval) + go scc.runPeriodicCheck() // not use + } else { + glog.Infof("Starting status correction checker in passive mode (serial pipeline handles processing)") + } + + return nil +} + +// PerformStatusCheckOnce executes one status check cycle, called by Pipeline Phase 4. +// Returns the set of affected user IDs whose data was modified. +func (scc *StatusCorrectionChecker) PerformStatusCheckOnce() map[string]bool { + if !scc.isRunning { + return nil + } + return scc.performStatusCheck() // pipeline start +} + // Stop stops the periodic status checking func (scc *StatusCorrectionChecker) Stop() { scc.mutex.Lock() @@ -157,12 +187,12 @@ func (scc *StatusCorrectionChecker) runPeriodicCheck() { glog.Infof("Status correction checker periodic loop started") // Perform initial check immediately - scc.performStatusCheck() + scc.performStatusCheck() // not use for { select { case <-ticker.C: - scc.performStatusCheck() + scc.performStatusCheck() // not use case <-scc.stopChan: glog.Infof("Status correction checker periodic loop stopped") return @@ -171,8 +201,9 @@ func (scc *StatusCorrectionChecker) runPeriodicCheck() { } // performStatusCheck performs a single status check cycle -func (scc *StatusCorrectionChecker) performStatusCheck() { +func (scc *StatusCorrectionChecker) performStatusCheck() map[string]bool { startTime := time.Now() + result := make(map[string]bool) scc.mutex.Lock() scc.lastCheckTime = startTime @@ -181,45 +212,40 @@ func (scc *StatusCorrectionChecker) performStatusCheck() { glog.Infof("Starting status check cycle #%d", scc.checkCount) - // Fetch latest status from app-service latestStatus, err := scc.fetchLatestStatus() if err != nil { - glog.Errorf("Failed to fetch latest status from app-service: %v", err) - return + glog.Errorf("[UserChanged] Failed to fetch latest status from app-service: %v", err) + return result } - glog.V(2).Infof("Fetched status for %d applications and middlewares from app-service", len(latestStatus)) + glog.V(3).Infof("[UserChanged] Fetched status for %d applications and middlewares from app-service: %s", len(latestStatus), utils.ParseJson(latestStatus)) - // Get current status from cache cachedStatus := scc.getCachedStatus() if len(cachedStatus) == 0 { - glog.Infof("No cached status found, skipping comparison") - return + glog.Error("[UserChanged] No cached status found, skipping comparison") + return result } - glog.V(2).Infof("Found cached status for %d applications and middlewares", len(cachedStatus)) + glog.V(3).Infof("[UserChanged] Found cached status for %d applications and middlewares: %s", len(cachedStatus), utils.ParseJson(cachedStatus)) - // Compare and detect changes changes := scc.compareStatus(latestStatus, cachedStatus) - glog.V(2).Infof("[UserChanged] Found cached status, changed: %+v", changes) + glog.V(2).Infof("[UserChanged] Found cached status, changed: %+v, app: %d, middlewares: %d", changes, len(latestStatus), len(cachedStatus)) if len(changes) > 0 { - glog.V(2).Infof("Detected %d status changes, applying corrections", len(changes)) + glog.V(2).Infof("[UserChanged] Detected %d status changes, applying corrections, changes: %s", len(changes), utils.ParseJson(changes)) scc.applyCorrections(changes, latestStatus) - // After applying corrections, recalculate and update user data hash for all affected users. - // This ensures the hash stays consistent with the latest user data state. - // The hash calculation logic is consistent with DataWatcher (see datawatcher_app.go). - // affectedUsers := make(map[string]struct{}) - affectedUsers := make(map[string]*StatusChange) + // Apply UserInfo changes and collect affected users. + // Hash calculation and ForceSync are deferred to Pipeline Phase 5. + changesByUser := make(map[string]*StatusChange) for _, change := range changes { - affectedUsers[change.UserID] = &change //change.ChangeType + changesByUser[change.UserID] = &change } - for userID, cs := range affectedUsers { + for userID, cs := range changesByUser { userData := scc.cacheManager.GetUserData(userID) if userData == nil { - glog.V(3).Infof("StatusCorrectionChecker: userData not found for user %s, skip hash calculation", userID) + glog.Warningf("StatusCorrectionChecker: userData not found for user %s", userID) continue } @@ -234,30 +260,7 @@ func (scc *StatusCorrectionChecker) performStatusCheck() { glog.V(2).Infof("[UserChanged] userId: %s, userInfo is null", cs.UserID) } - // Generate snapshot for hash calculation (reuse logic from DataWatcher) - snapshot, err := utils.CreateUserDataSnapshot(userID, userData) - if err != nil { - glog.Errorf("StatusCorrectionChecker: failed to create snapshot for user %s: %v", userID, err) - continue - } - newHash, err := utils.CalculateUserDataHash(snapshot) - if err != nil { - glog.Errorf("StatusCorrectionChecker: failed to calculate hash for user %s: %v", userID, err) - continue - } - // Write back hash with lock - glog.V(3).Infof("[LOCK] scc.cacheManager.mutex.TryLock() @status_correction:updateHash Start") - if !scc.cacheManager.mutex.TryLock() { - glog.Warning("[TryLock] StatusCorrectionChecker: CacheManager write lock not available for hash update, skipping") - continue - } - userData.Hash = newHash - scc.cacheManager.mutex.Unlock() - glog.V(2).Infof("StatusCorrectionChecker: user %s hash updated to %s", userID, newHash) - } - // Force sync after hash update - if err := scc.cacheManager.ForceSync(); err != nil { - glog.Errorf("StatusCorrectionChecker: ForceSync failed after hash update: %v", err) + result[userID] = true } scc.mutex.Lock() @@ -267,10 +270,10 @@ func (scc *StatusCorrectionChecker) performStatusCheck() { glog.V(3).Info("No status changes detected") } - // Check and correct task statuses scc.checkAndCorrectTaskStatuses(latestStatus) glog.V(2).Infof("Status check cycle #%d completed in %v", scc.checkCount, time.Since(startTime)) + return result } // fetchLatestStatus fetches the latest status from app-service @@ -290,6 +293,15 @@ func (scc *StatusCorrectionChecker) fetchLatestStatus() ([]utils.AppServiceRespo return appsStatus, nil } + var printf []interface{} + for _, md := range appsStatus { + if md.Spec.Name != "olares-app" { + printf = append(printf, md) + } + } + + glog.Infof("[SCC] fetch latest appStatus: %s", utils.ParseJson(printf)) + // Combine apps and middlewares status // Convert middlewares to AppServiceResponse format and merge with apps allStatus := make([]utils.AppServiceResponse, 0, len(appsStatus)+len(middlewaresStatus)) @@ -413,6 +425,16 @@ func (scc *StatusCorrectionChecker) fetchLatestMiddlewaresStatus() ([]utils.AppS Url string `json:"url"` Invisible bool `json:"invisible"` } `json:"entrances"` + Settings struct { + ClusterScoped string `json:"clusterScoped"` + MobileSupported string `json:"mobileSupported"` + Policy string `json:"policy"` + RequiredGPU string `json:"requiredGPU"` + Source string `json:"source"` + Target string `json:"target"` + Title string `json:"title"` + Version string `json:"version"` + } `json:"settings"` }{ Name: middleware.Metadata.Name, AppID: middleware.Metadata.Name, @@ -506,6 +528,15 @@ func (scc *StatusCorrectionChecker) getCachedStatus() map[string]*types.AppState } } + var printf = make(map[string]interface{}) + for k, v := range cachedStatus { + if !strings.HasSuffix(k, "olares-app") { + printf[k] = v + } + } + + glog.Infof("[SCC] fetch cached appStatus: %s", utils.ParseJson(printf)) + return cachedStatus } @@ -897,13 +928,13 @@ func (scc *StatusCorrectionChecker) applyCorrections(changes []StatusChange, lat } } - appStateData, sourceID := scc.createAppStateDataFromResponse(*appToUpdate, change.UserID) + appStateData, sourceID := scc.createAppStateDataFromResponse(*appToUpdate, change.UserID) // app_appeared if appStateData == nil { glog.V(3).Infof("Failed to create app state data for appeared app %s (user: %s)", change.AppName, change.UserID) continue } - stateData := scc.createStateDataFromAppStateData(appStateData) - if err := scc.cacheManager.SetAppData(change.UserID, sourceID, AppStateLatest, stateData); err != nil { + stateData := scc.createStateDataFromAppStateData(appStateData) // app_appeared + if err := scc.cacheManager.SetAppData(change.UserID, sourceID, AppStateLatest, stateData, "SCC_app_appeared"); err != nil { glog.Errorf("Failed to add appeared app %s to cache (user: %s, source: %s): %v", change.AppName, change.UserID, sourceID, err) } else { @@ -939,13 +970,13 @@ func (scc *StatusCorrectionChecker) applyCorrections(changes []StatusChange, lat } } - appStateData, sourceID := scc.createAppStateDataFromResponse(*appToUpdate, change.UserID) + appStateData, sourceID := scc.createAppStateDataFromResponse(*appToUpdate, change.UserID) // state_change if appStateData == nil { glog.V(3).Infof("Failed to create app state data for app %s (user: %s)", change.AppName, change.UserID) continue } - stateData := scc.createStateDataFromAppStateData(appStateData) - if err := scc.cacheManager.SetAppData(change.UserID, sourceID, AppStateLatest, stateData); err != nil { + stateData := scc.createStateDataFromAppStateData(appStateData) // state_change + if err := scc.cacheManager.SetAppData(change.UserID, sourceID, AppStateLatest, stateData, "SCC_state_change"); err != nil { glog.Errorf("Failed to update cache with corrected status for app %s (user: %s, source: %s): %v", change.AppName, change.UserID, sourceID, err) } else { @@ -995,14 +1026,14 @@ func (scc *StatusCorrectionChecker) applyCorrections(changes []StatusChange, lat } } - appStateData, sourceID := scc.createAppStateDataFromResponse(*appToUpdate, change.UserID) + appStateData, sourceID := scc.createAppStateDataFromResponse(*appToUpdate, change.UserID) // state_inconsistency if appStateData == nil { glog.V(3).Infof("Failed to create app state data for app %s (user: %s)", change.AppName, change.UserID) continue } appStateData.Status.State = "running" - stateData := scc.createStateDataFromAppStateData(appStateData) - if err := scc.cacheManager.SetAppData(change.UserID, sourceID, AppStateLatest, stateData); err != nil { + stateData := scc.createStateDataFromAppStateData(appStateData) // state_inconsistency + if err := scc.cacheManager.SetAppData(change.UserID, sourceID, AppStateLatest, stateData, "SCC_state_inconsistency"); err != nil { glog.Errorf("Failed to update cache with corrected state for inconsistent app %s (user: %s, source: %s): %v", change.AppName, change.UserID, sourceID, err) } else { @@ -1265,7 +1296,7 @@ func (scc *StatusCorrectionChecker) ForceCheck() error { } glog.Infof("Forcing immediate status check") - scc.performStatusCheck() + scc.performStatusCheck() // not used return nil } @@ -1327,7 +1358,7 @@ func (scc *StatusCorrectionChecker) checkAndCorrectTaskStatuses(latestStatus []u return } - glog.Infof("Checking %d running tasks for status correction", len(runningTasks)) + glog.Infof("[SCC] Checking %d running tasks for status correction", len(runningTasks)) // Create a map of app statuses for quick lookup: user:appName -> app status appStatusMap := make(map[string]*utils.AppServiceResponse) @@ -1358,39 +1389,39 @@ func (scc *StatusCorrectionChecker) checkAndCorrectTaskStatuses(latestStatus []u if runningTask.Type == task.CloneApp { taskTypeStr = "Clone" } - glog.Infof("Task status correction: %s task %s for app %s (user: %s) should be completed - app is running", + glog.Infof("[SCC] Task status correction: %s task %s for app %s (user: %s) should be completed - app is running", taskTypeStr, runningTask.ID, runningTask.AppName, runningTask.User) if err := scc.taskModule.InstallTaskSucceed(runningTask.OpID, runningTask.AppName, runningTask.User); err != nil { - glog.Warningf("Failed to mark %s task as succeeded: %v", taskTypeStr, err) + glog.Warningf("[SCC] Failed to mark %s task as succeeded: %v", taskTypeStr, err) } else { correctedCount++ - glog.Infof("Successfully corrected %s task status: %s", taskTypeStr, runningTask.ID) + glog.Infof("[SCC] Successfully corrected %s task status: %s", taskTypeStr, runningTask.ID) } } case task.UninstallApp: // For uninstall tasks: if app doesn't exist, mark task as completed if !exists { - glog.Infof("Task status correction: Uninstall task %s for app %s (user: %s) should be completed - app no longer exists", + glog.Infof("[SCC] Task status correction: Uninstall task %s for app %s (user: %s) should be completed - app no longer exists", runningTask.ID, runningTask.AppName, runningTask.User) if err := scc.taskModule.UninstallTaskSucceed(runningTask.OpID, runningTask.AppName, runningTask.User); err != nil { - glog.Warningf("Failed to mark uninstall task as succeeded: %v", err) + glog.Warningf("[SCC] Failed to mark uninstall task as succeeded: %v", err) } else { correctedCount++ - glog.Infof("Successfully corrected uninstall task status: %s", runningTask.ID) + glog.Infof("[SCC] Successfully corrected uninstall task status: %s", runningTask.ID) } } case task.CancelAppInstall: // For cancel install tasks: if app doesn't exist, mark task as completed if !exists { - glog.Infof("Task status correction: Cancel install task %s for app %s (user: %s) should be completed - app no longer exists", + glog.Infof("[SCC] Task status correction: Cancel install task %s for app %s (user: %s) should be completed - app no longer exists", runningTask.ID, runningTask.AppName, runningTask.User) if err := scc.taskModule.CancelInstallTaskSucceed(runningTask.OpID, runningTask.AppName, runningTask.User); err != nil { - glog.Warningf("Failed to mark cancel install task as succeeded: %v", err) + glog.Warningf("[SCC] Failed to mark cancel install task as succeeded: %v", err) } else { correctedCount++ - glog.Infof("Successfully corrected cancel install task status: %s", runningTask.ID) + glog.Infof("[SCC] Successfully corrected cancel install task status: %s", runningTask.ID) } } @@ -1400,14 +1431,14 @@ func (scc *StatusCorrectionChecker) checkAndCorrectTaskStatuses(latestStatus []u // are typically completed through their normal execution flow. // We log it for monitoring but don't auto-correct to avoid conflicts. if exists && appStatus != nil && appStatus.Status.State == "running" { - glog.Infof("Task status correction: Upgrade task %s for app %s (user: %s) appears completed - app is running (not auto-correcting)", + glog.Infof("[SCC] Task status correction: Upgrade task %s for app %s (user: %s) appears completed - app is running (not auto-correcting)", runningTask.ID, runningTask.AppName, runningTask.User) } } } if correctedCount > 0 { - glog.Infof("Task status correction completed: corrected %d task(s)", correctedCount) + glog.Infof("[SCC] Task status correction completed: corrected %d task(s)", correctedCount) scc.mutex.Lock() scc.correctionCount += int64(correctedCount) scc.mutex.Unlock() diff --git a/internal/v2/appinfo/syncer.go b/internal/v2/appinfo/syncer.go index 8352b56..8a9719f 100644 --- a/internal/v2/appinfo/syncer.go +++ b/internal/v2/appinfo/syncer.go @@ -2,7 +2,10 @@ package appinfo import ( "context" + "encoding/json" "fmt" + "sort" + "strings" "sync" "sync/atomic" "time" @@ -12,6 +15,7 @@ import ( "market/internal/v2/types" "market/internal/v2/utils" + "github.com/go-resty/resty/v2" "github.com/golang/glog" ) @@ -26,6 +30,11 @@ type Syncer struct { mutex sync.RWMutex // Keep mutex for steps slice operations settingsManager *settings.SettingsManager // Settings manager for data source information + lastSyncExecuted time.Time // Last time a full sync cycle was actually executed + + lastKnownRemoteSourceIDs atomic.Value // string: sorted comma-joined remote source IDs from last sync + lastKnownUserIDs atomic.Value // string: sorted comma-joined user IDs from last sync + // Status tracking fields lastSyncTime atomic.Value // time.Time lastSyncSuccess atomic.Value // time.Time @@ -42,6 +51,7 @@ type Syncer struct { lastSyncedAppCount atomic.Int64 lastSyncDetails atomic.Value // *SyncDetails statusMutex sync.RWMutex // Mutex for complex status updates + tryOnce atomic.Bool } // NewSyncer creates a new syncer with the given steps @@ -54,6 +64,7 @@ func NewSyncer(cache *CacheData, syncInterval time.Duration, settingsManager *se stopChan: make(chan struct{}), isRunning: atomic.Bool{}, // Initialize with false settingsManager: settingsManager, + tryOnce: atomic.Bool{}, } // Initialize atomic values s.lastSyncTime.Store(time.Time{}) @@ -62,24 +73,21 @@ func NewSyncer(cache *CacheData, syncInterval time.Duration, settingsManager *se s.currentStep.Store("") s.lastSyncDuration.Store(time.Duration(0)) s.currentSource.Store("") + s.lastKnownRemoteSourceIDs.Store("") + s.lastKnownUserIDs.Store("") return s } // AddStep adds a step to the syncer func (s *Syncer) AddStep(step syncerfn.SyncStep) { - if !s.mutex.TryLock() { - glog.Warning("[TryLock] Failed to acquire lock for AddStep, skipping") - return - } + s.mutex.Lock() defer s.mutex.Unlock() s.steps = append(s.steps, step) } // RemoveStep removes a step by index func (s *Syncer) RemoveStep(index int) error { - if !s.mutex.TryLock() { - return fmt.Errorf("failed to acquire lock for RemoveStep") - } + s.mutex.Lock() defer s.mutex.Unlock() if index < 0 || index >= len(s.steps) { @@ -92,10 +100,7 @@ func (s *Syncer) RemoveStep(index int) error { // GetSteps returns a copy of all steps func (s *Syncer) GetSteps() []syncerfn.SyncStep { - if !s.mutex.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for GetSteps, returning empty slice") - return make([]syncerfn.SyncStep, 0) - } + s.mutex.RLock() defer s.mutex.RUnlock() steps := make([]syncerfn.SyncStep, len(s.steps)) @@ -103,11 +108,15 @@ func (s *Syncer) GetSteps() []syncerfn.SyncStep { return steps } -// Start begins the synchronization process +// Start begins the synchronization process with its own sync loop func (s *Syncer) Start(ctx context.Context) error { - if !s.mutex.TryLock() { - return fmt.Errorf("failed to acquire lock for Start") - } + return s.StartWithOptions(ctx, true) +} + +// StartWithOptions starts the syncer with options. +// If enableSyncLoop is false, the periodic sync loop is not started (Pipeline handles scheduling). +func (s *Syncer) StartWithOptions(ctx context.Context, enableSyncLoop bool) error { + s.mutex.Lock() if s.isRunning.Load() { s.mutex.Unlock() return fmt.Errorf("syncer is already running") @@ -115,18 +124,154 @@ func (s *Syncer) Start(ctx context.Context) error { s.isRunning.Store(true) s.mutex.Unlock() - glog.V(2).Infof("Starting syncer with %d steps, sync interval: %v", len(s.steps), s.syncInterval) - - go s.syncLoop(ctx) + if enableSyncLoop { + glog.V(2).Infof("Starting syncer with %d steps, sync interval: %v", len(s.steps), s.syncInterval) + go s.syncLoop(ctx) // not use + } else { + glog.V(2).Infof("Starting syncer with %d steps (passive mode, Pipeline handles scheduling)", len(s.steps)) + } return nil } -// Stop stops the synchronization process -func (s *Syncer) Stop() { - if !s.mutex.TryLock() { - glog.Warning("[TryLock] Failed to acquire lock for Stop, skipping") +// SyncOnce executes one sync cycle if at least syncInterval has elapsed +// since the last execution, OR if the sync-relevant configuration has changed +// (e.g. a new source or user was added/removed), OR if a remote source's +// data hash has changed (lightweight probe). Called by Pipeline on every tick. +func (s *Syncer) SyncOnce(ctx context.Context) { + if !s.isRunning.Load() { return } + + flag := s.tryOnce.Load() + if flag { + // return + } + + configChanged, reason := s.hasSyncRelevantConfigChanged() + throttled := !s.lastSyncExecuted.IsZero() && time.Since(s.lastSyncExecuted) < s.syncInterval + + if !configChanged && throttled { + if s.hasAnyRemoteHashChanged(ctx) { + glog.V(2).Info("SyncOnce: remote data hash changed, forcing sync cycle") + } else { + glog.V(3).Infof("SyncOnce: skipping, last sync was %v ago (interval: %v)", + time.Since(s.lastSyncExecuted), s.syncInterval) + return + } + } + if configChanged { + glog.V(2).Infof("SyncOnce: %s, forcing sync cycle", reason) + } + s.lastSyncExecuted = time.Now() + if err := s.executeSyncCycle(ctx); err != nil { + glog.Errorf("SyncOnce: sync cycle failed: %v", err) + } + + s.tryOnce.Store(true) +} + +// hasAnyRemoteHashChanged does a lightweight HTTP probe to each remote source's +// hash endpoint and returns true if any source's remote hash differs from the +// locally cached Others.Hash. Errors are silently ignored (conservative: don't +// force sync on network failure). +func (s *Syncer) hasAnyRemoteHashChanged(ctx context.Context) bool { + config := s.settingsManager.GetMarketSources() + if config == nil { + return false + } + + endpoints := s.settingsManager.GetAPIEndpoints() + hashPath := "/api/v1/appstore/hash" + if endpoints != nil && endpoints.HashPath != "" { + hashPath = endpoints.HashPath + } + + version := getVersionForSync() + client := resty.New().SetTimeout(3 * time.Second) + + for _, src := range config.Sources { + if src.Type != "remote" { + continue + } + + hashURL := s.settingsManager.BuildAPIURL(src.BaseURL, hashPath) + if strings.HasPrefix(hashURL, "file://") { + continue + } + + resp, err := client.R().SetContext(ctx).SetQueryParam("version", version).Get(hashURL) + if err != nil || resp.StatusCode() != 200 { + continue + } + + var hr struct { + Hash string `json:"hash"` + } + if json.Unmarshal(resp.Body(), &hr) != nil || hr.Hash == "" { + continue + } + + localHash := "" + if cm := s.cacheManager.Load(); cm != nil { + localHash = cm.GetSourceOthersHash(src.ID) + } + + if hr.Hash != localHash { + glog.V(2).Infof("SyncOnce: hash changed for source %s (remote=%s, local=%s)", + src.ID, hr.Hash, localHash) + return true + } + } + + return false +} + +// hasSyncRelevantConfigChanged checks whether the remote source list or the +// user list has changed since the last sync cycle. Returns true with a +// human-readable reason when a change is detected. +func (s *Syncer) hasSyncRelevantConfigChanged() (changed bool, reason string) { + // Check remote sources + config := s.settingsManager.GetMarketSources() + if config != nil && len(config.Sources) > 0 { + var remoteIDs []string + for _, src := range config.Sources { + if src.Type == "remote" { + remoteIDs = append(remoteIDs, src.ID) + } + } + sort.Strings(remoteIDs) + currentKey := strings.Join(remoteIDs, ",") + + lastKnown, _ := s.lastKnownRemoteSourceIDs.Load().(string) + if currentKey != lastKnown { + s.lastKnownRemoteSourceIDs.Store(currentKey) + if lastKnown != "" { + return true, "remote source configuration changed" + } + } + } + + // Check user list + if cm := s.cacheManager.Load(); cm != nil { + userIDs := cm.GetUserIDs() + sort.Strings(userIDs) + currentKey := strings.Join(userIDs, ",") + + lastKnown, _ := s.lastKnownUserIDs.Load().(string) + if currentKey != lastKnown { + s.lastKnownUserIDs.Store(currentKey) + if lastKnown != "" { + return true, "user list changed" + } + } + } + + return false, "" +} + +// Stop stops the synchronization process +func (s *Syncer) Stop() { + s.mutex.Lock() defer s.mutex.Unlock() if !s.isRunning.Load() { @@ -146,11 +291,9 @@ func (s *Syncer) IsRunning() bool { // syncLoop runs the main synchronization loop func (s *Syncer) syncLoop(ctx context.Context) { defer func() { - // Use TryLock for cleanup to avoid blocking - if s.mutex.TryLock() { - s.isRunning.Store(false) - s.mutex.Unlock() - } + s.mutex.Lock() + s.isRunning.Store(false) + s.mutex.Unlock() glog.V(4).Info("Syncer stopped") }() @@ -164,7 +307,7 @@ func (s *Syncer) syncLoop(ctx context.Context) { return default: // Execute sync cycle - if err := s.executeSyncCycle(ctx); err != nil { + if err := s.executeSyncCycle(ctx); err != nil { // not use glog.Errorf("Sync cycle failed: %v", err) } @@ -292,10 +435,7 @@ func (s *Syncer) executeSyncCycle(ctx context.Context) error { // updateSyncSuccess updates status after a successful sync func (s *Syncer) updateSyncSuccess(duration time.Duration, startTime time.Time) { - if !s.statusMutex.TryLock() { - glog.Warning("[TryLock] Failed to acquire lock for updateSyncSuccess, skipping status update") - return - } + s.statusMutex.Lock() defer s.statusMutex.Unlock() s.lastSyncSuccess.Store(time.Now()) @@ -310,10 +450,7 @@ func (s *Syncer) updateSyncSuccess(duration time.Duration, startTime time.Time) // updateSyncFailure updates status after a failed sync func (s *Syncer) updateSyncFailure(err error, startTime time.Time) { - if !s.statusMutex.TryLock() { - glog.Warning("[TryLock] Failed to acquire lock for updateSyncFailure, skipping status update") - return - } + s.statusMutex.Lock() defer s.statusMutex.Unlock() duration := time.Since(startTime) @@ -466,43 +603,11 @@ func (s *Syncer) executeSyncCycleWithSource(ctx context.Context, source *setting sourceID := source.ID // Use market source name as source ID glog.V(3).Infof("Using source ID: %s for data storage", sourceID) - // Get all existing user IDs with minimal locking + // Get all existing user IDs, creating a system user if none exist var userIDs []string - // Use CacheManager if available, otherwise use direct cache access if cacheManager := s.cacheManager.Load(); cacheManager != nil { - // Use CacheManager's lock - if !cacheManager.mutex.TryRLock() { - glog.Warning("[TryRLock] Syncer: CacheManager read lock not available, skipping user ID collection") - return fmt.Errorf("read lock not available") - } - for userID := range s.cache.Users { - userIDs = append(userIDs, userID) - } - cacheManager.mutex.RUnlock() - - // If no users exist, create a system user as fallback - if len(userIDs) == 0 { - glog.V(3).Infof("[LOCK] cacheManager.mutex.TryLock() @syncer:createSystemUser Start") - if !cacheManager.mutex.TryLock() { - glog.Warning("[TryLock] Syncer: CacheManager write lock not available for system user creation, skipping") - return fmt.Errorf("write lock not available") - } - // Double-check after acquiring write lock - if len(s.cache.Users) == 0 { - systemUserID := "system" - s.cache.Users[systemUserID] = NewUserDataEx(systemUserID) // NewUserData() - userIDs = append(userIDs, systemUserID) - glog.V(3).Infof("No existing users found, created system user as fallback") - } else { - // Users were added by another goroutine - for userID := range s.cache.Users { - userIDs = append(userIDs, userID) - } - } - cacheManager.mutex.Unlock() - } + userIDs = cacheManager.GetOrCreateUserIDs("system") } else { - // Fallback to direct cache access without lock (not recommended) glog.V(3).Info("Warning: CacheManager not available, using direct cache access") for userID := range s.cache.Users { userIDs = append(userIDs, userID) @@ -531,18 +636,16 @@ func (s *Syncer) executeSyncCycleWithSource(ctx context.Context, source *setting // storeDataDirectly stores data directly to cache without going through CacheManager func (s *Syncer) storeDataDirectly(userID, sourceID string, completeData map[string]interface{}) { - // Use CacheManager's lock if available - if cacheManager := s.cacheManager.Load(); cacheManager != nil { - glog.V(3).Infof("[LOCK] cacheManager.mutex.TryLock() @syncer:storeDataDirectly Start") - if !cacheManager.mutex.TryLock() { - glog.Warning("[TryLock] Syncer: CacheManager write lock not available for data storage, skipping") - return - } - defer cacheManager.mutex.Unlock() - } else { - // Fallback: no lock protection (not recommended) + cacheManager := s.cacheManager.Load() + if cacheManager == nil { glog.V(3).Infof("Warning: CacheManager not available for storeDataDirectly") + return } + // TODO: refactor storeDataDirectly to use CacheManager write methods + // For now, use the internal mutex directly as this function contains + // complex parsing logic (~250 lines) that runs under the lock. + cacheManager.mutex.Lock() + defer cacheManager.mutex.Unlock() userData := s.cache.Users[userID] @@ -810,29 +913,16 @@ func (s *Syncer) storeDataViaCacheManager(userIDs []string, sourceID string, com for _, userID := range userIDs { // Check if the source is local type - skip syncer operations for local sources if cacheManager := s.cacheManager.Load(); cacheManager != nil { - if !cacheManager.mutex.TryRLock() { - glog.Warningf("[TryRLock] Syncer.storeDataViaCacheManager: CacheManager read lock not available for user %s, source %s, skipping", userID, sourceID) + if cacheManager.IsLocalSource(userID, sourceID) { + glog.V(3).Infof("Skipping syncer CacheManager operation for local source: user=%s, source=%s", userID, sourceID) continue } - userData, userExists := s.cache.Users[userID] - if userExists { - sourceData, sourceExists := userData.Sources[sourceID] - if sourceExists { - sourceType := sourceData.Type - if sourceType == types.SourceDataTypeLocal { - glog.V(3).Infof("Skipping syncer CacheManager operation for local source: user=%s, source=%s", userID, sourceID) - cacheManager.mutex.RUnlock() - continue - } - } - } - cacheManager.mutex.RUnlock() } // Use CacheManager.SetAppData to trigger hydration notifications if available if cacheManager := s.cacheManager.Load(); cacheManager != nil { glog.V(3).Infof("Using CacheManager to store data for user: %s, source: %s", userID, sourceID) - err := cacheManager.SetAppData(userID, sourceID, AppInfoLatestPending, completeData) + err := cacheManager.SetAppData(userID, sourceID, AppInfoLatestPending, completeData,"Syncer") if err != nil { glog.Errorf("Failed to store data via CacheManager for user: %s, source: %s, error: %v", userID, sourceID, err) // Fall back to direct cache access @@ -891,12 +981,9 @@ func DefaultSyncerConfig() SyncerConfig { // SetCacheManager sets the cache manager for hydration notifications func (s *Syncer) SetCacheManager(cacheManager *CacheManager) { - if !s.mutex.TryLock() { - glog.Warning("[TryLock] Failed to acquire lock for SetCacheManager, skipping") - return - } + s.mutex.Lock() defer s.mutex.Unlock() - s.cacheManager.Store(cacheManager) // Use atomic.Store to set the pointer + s.cacheManager.Store(cacheManager) } // SyncDetails contains detailed information about a sync operation diff --git a/internal/v2/appinfo/syncerfn/data_fetch_step.go b/internal/v2/appinfo/syncerfn/data_fetch_step.go index 5230b94..ee6aa02 100644 --- a/internal/v2/appinfo/syncerfn/data_fetch_step.go +++ b/internal/v2/appinfo/syncerfn/data_fetch_step.go @@ -106,38 +106,19 @@ func (d *DataFetchStep) CanSkip(ctx context.Context, data *SyncContext) bool { // Check if we have existing data in cache for THIS specific source only hasExistingData := false - if data.Cache != nil { - // Use CacheManager's lock for unified lock strategy - if data.CacheManager != nil { - data.CacheManager.RLock() - for userID, userData := range data.Cache.Users { - // Only check data for the current market source - if sourceData, exists := userData.Sources[sourceID]; exists { - if len(sourceData.AppInfoLatestPending) > 0 || len(sourceData.AppInfoLatest) > 0 { - hasExistingData = true - glog.V(2).Infof("Found existing data for source:%s user:%s (pending:%d latest:%d)", - sourceID, userID, len(sourceData.AppInfoLatestPending), len(sourceData.AppInfoLatest)) - break - } - } - } - data.CacheManager.RUnlock() - } else { - // Fallback to SyncContext's mutex if CacheManager is not available - data.mutex.RLock() - for userID, userData := range data.Cache.Users { - // Only check data for the current market source - if sourceData, exists := userData.Sources[sourceID]; exists { - if len(sourceData.AppInfoLatestPending) > 0 || len(sourceData.AppInfoLatest) > 0 { - hasExistingData = true - glog.V(2).Infof("Found existing data for source:%s user:%s (pending:%d latest:%d)", - sourceID, userID, len(sourceData.AppInfoLatestPending), len(sourceData.AppInfoLatest)) - break - } + if data.CacheManager != nil { + hasExistingData = data.CacheManager.HasSourceData(sourceID) + } else if data.Cache != nil { + data.mutex.RLock() + for _, userData := range data.Cache.Users { + if sourceData, exists := userData.Sources[sourceID]; exists { + if len(sourceData.AppInfoLatestPending) > 0 || len(sourceData.AppInfoLatest) > 0 { + hasExistingData = true + break } } - data.mutex.RUnlock() } + data.mutex.RUnlock() } // Skip only if hashes match AND we have existing data for THIS specific source @@ -657,64 +638,11 @@ func (d *DataFetchStep) updateOthersInCache(data *SyncContext, others *types.Oth // Get source ID from market source - use Name to match syncer.go behavior sourceID := data.MarketSource.ID - // Use CacheManager's lock for unified lock strategy if data.CacheManager != nil { - data.CacheManager.Lock() - defer data.CacheManager.Unlock() - } - - // Get all existing user IDs - var userIDs []string - for userID := range data.Cache.Users { - userIDs = append(userIDs, userID) - } - - // If no users exist, create a system user as fallback - if len(userIDs) == 0 { - systemUserID := "system" - data.Cache.Users[systemUserID] = types.NewUserData() - userIDs = append(userIDs, systemUserID) - glog.V(3).Infof("No existing users found, created system user as fallback") - } - - glog.V(3).Infof("Updating Others data for %d users: %v, sourceID: %s", len(userIDs), userIDs, sourceID) - - // Update Others for each user - for _, userID := range userIDs { - userData := data.Cache.Users[userID] - - // Ensure source data exists for this user - if userData.Sources == nil { - userData.Sources = make(map[string]*types.SourceData) - } - - if userData.Sources[sourceID] == nil { - userData.Sources[sourceID] = types.NewSourceData() - } - - sourceData := userData.Sources[sourceID] - - // Update Others in SourceData - sourceData.Others = others - - // Log details about the saved recommends data - if sourceData.Others != nil && len(sourceData.Others.Recommends) > 0 { - glog.V(3).Infof("DEBUG: Saved %d recommends to cache for user %s, source %s", - len(sourceData.Others.Recommends), userID, sourceID) - for i, rec := range sourceData.Others.Recommends { - glog.V(3).Infof("DEBUG: Saved recommend[%d] '%s', has Data: %v", - i, rec.Name, rec.Data != nil) - if rec.Data != nil { - glog.V(3).Infof("DEBUG: Saved recommend[%d] Data.Title count: %d, Data.Description count: %d", - i, len(rec.Data.Title), len(rec.Data.Description)) - } - } - } else { - glog.V(3).Infof("DEBUG: No recommends data saved to cache for user %s, source %s", userID, sourceID) - } - - glog.V(3).Infof("Updated Others data in cache for user %s, source %s", userID, sourceID) + data.CacheManager.UpdateSourceOthers(sourceID, others) + } else { + glog.Warning("CacheManager not available, cannot update Others in cache") } - glog.V(2).Infof("Successfully updated Others data for all %d users, source %s", len(userIDs), sourceID) + glog.V(2).Infof("Successfully updated Others data for source %s", sourceID) } diff --git a/internal/v2/appinfo/syncerfn/detail_fetch_step.go b/internal/v2/appinfo/syncerfn/detail_fetch_step.go index e7962e4..d3b8425 100644 --- a/internal/v2/appinfo/syncerfn/detail_fetch_step.go +++ b/internal/v2/appinfo/syncerfn/detail_fetch_step.go @@ -3,7 +3,6 @@ package syncerfn import ( "context" "fmt" - "reflect" "strings" "time" @@ -488,8 +487,9 @@ func (d *DetailFetchStep) fetchAppsBatch(ctx context.Context, appIDs []string, d glog.V(3).Info("Mutex lock released successfully") // Now remove apps from cache after releasing the main lock to avoid nested locks + var source = data.GetMarketSource() for _, appToRemove := range appsToRemove { - d.removeAppFromCache(appToRemove.appID, appToRemove.appInfoMap, data) + d.removeAppFromCache(appToRemove.appID, appToRemove.appInfoMap, data, source) } // Count successful and failed apps @@ -555,7 +555,7 @@ func (d *DetailFetchStep) fetchAppsBatch(ctx context.Context, appIDs []string, d } // removeAppFromCache removes an app from cache for all users -func (d *DetailFetchStep) removeAppFromCache(appID string, appInfoMap map[string]interface{}, data *SyncContext) { +func (d *DetailFetchStep) removeAppFromCache(appID string, appInfoMap map[string]interface{}, data *SyncContext, source *settings.MarketSource) { appName, ok := appInfoMap["name"].(string) if !ok || appName == "" { glog.V(3).Infof("Warning: Cannot remove app from cache - app name is empty for app: %s", appID) @@ -571,183 +571,17 @@ func (d *DetailFetchStep) removeAppFromCache(appID string, appInfoMap map[string glog.V(3).Infof("Starting to remove app %s %s from cache", appID, appName) - // Get app name for matching - when an app is suspended, remove ALL versions of that app - - // Get source ID from market source - source := data.GetMarketSource() - if source == nil { - glog.V(3).Infof("Warning: MarketSource is nil, cannot remove app %s %s from cache", appID, appName) - return - } // IMPORTANT: use MarketSource.ID as the key for Sources map (not Name) sourceID := source.ID - glog.V(2).Infof("Removing all versions of app %s (name: %s) from cache for source: %s (sourceID=%s)", appID, appName, source.Name, sourceID) + glog.V(2).Infof("Removing all versions of app %s(%s) from cache for source: %s [SUSPEND/REMOVE]", appID, appName, sourceID) if data.CacheManager == nil { glog.V(3).Infof("Warning: CacheManager is nil, cannot remove app from cache") return } - // Step 1: Use try read lock to find all data that needs to be removed - glog.V(2).Infof("Step 1: Attempting to acquire read lock to find data for removal") - if !data.CacheManager.TryRLock() { - glog.Warningf("[TryRLock] Warning: Read lock not available for app removal, skipping: %s %s", appID, appName) - return - } - - // Collect all data that needs to be removed - type RemovalData struct { - userID string - sourceID string - newLatestList []*types.AppInfoLatestData - newPendingList []*types.AppInfoLatestPendingData - originalLatestCount int - originalPendingCount int - } - - var removals []RemovalData - - glog.V(3).Infof("Processing %d users for app removal (read phase)", len(data.Cache.Users)) - - for userID, userData := range data.Cache.Users { - sourceData, sourceExists := userData.Sources[sourceID] - if !sourceExists { - continue - } - - // Create new lists without the target app (all versions) - var newLatestList []*types.AppInfoLatestData - var newPendingList []*types.AppInfoLatestPendingData - - // Filter latest list - remove ALL versions of the app by name - for _, latestApp := range sourceData.AppInfoLatest { - if latestApp == nil || latestApp.RawData == nil { - newLatestList = append(newLatestList, latestApp) - continue - } - // Remove all versions of the app with matching name - if latestApp.RawData.Name != appName { - newLatestList = append(newLatestList, latestApp) - } else { - glog.V(3).Infof("Removing app version %s (name: %s) from AppInfoLatest", latestApp.RawData.Version, appName) - } - } - - // Filter pending list - remove ALL versions of the app by name - for _, pendingApp := range sourceData.AppInfoLatestPending { - if pendingApp == nil || pendingApp.RawData == nil { - newPendingList = append(newPendingList, pendingApp) - continue - } - // Remove all versions of the app with matching name - if pendingApp.RawData.Name != appName { - newPendingList = append(newPendingList, pendingApp) - } else { - glog.V(3).Infof("Removing pending app version %s (name: %s) from AppInfoLatestPending", pendingApp.RawData.Version, appName) - } - } - - // Only add to removals if there were actually items to remove - if len(newLatestList) != len(sourceData.AppInfoLatest) || len(newPendingList) != len(sourceData.AppInfoLatestPending) { - removals = append(removals, RemovalData{ - userID: userID, - sourceID: sourceID, - newLatestList: newLatestList, - newPendingList: newPendingList, - originalLatestCount: len(sourceData.AppInfoLatest), - originalPendingCount: len(sourceData.AppInfoLatestPending), - }) - } - } - - if len(removals) > 0 { - glog.V(2).Infof("Step 1 completed: Found %d users with data to remove", len(removals)) - } - - // Release read lock before acquiring write lock (must release manually since we need to acquire write lock) - data.CacheManager.RUnlock() - - // Step 2: Use try write lock to quickly update the data - if len(removals) == 0 { - glog.V(3).Infof("No data found to remove for app: %s", appID) - return - } - - glog.V(2).Info("Step 2: Attempting to acquire write lock to update data") - if !data.CacheManager.TryLock() { - glog.Warningf("[TryLock] Warning: Write lock not available for app removal, skipping: %s %s", appID, appName) - return - } - defer data.CacheManager.Unlock() - - // Collect sync requests to trigger after releasing the lock - type SyncReq struct { - userID string - sourceID string - } - var syncReqs []SyncReq - - // Quickly update all the data by replacing array pointers - for _, removal := range removals { - userData := data.Cache.Users[removal.userID] - sourceData := userData.Sources[removal.sourceID] - - // Replace array pointers (atomic operation) - sourceData.AppInfoLatest = removal.newLatestList - sourceData.AppInfoLatestPending = removal.newPendingList - - glog.V(3).Infof("Updated user: %s, source: %s, app: %s (latest: %d->%d, pending: %d->%d)", - removal.userID, removal.sourceID, appName, - removal.originalLatestCount, len(removal.newLatestList), - removal.originalPendingCount, len(removal.newPendingList)) - - // Collect sync request - syncReqs = append(syncReqs, SyncReq{ - userID: removal.userID, - sourceID: removal.sourceID, - }) - } - - glog.V(3).Infof("App removal from cache completed for app: %s %s", appID, appName) - - // Trigger sync to Redis for all affected users and sources after releasing the lock - // Use reflection to access the private requestSync method - // We do this in a goroutine to avoid blocking and to ensure the lock is released first - go func() { - // Wait a bit to ensure the lock is released - time.Sleep(10 * time.Millisecond) - - cmValue := reflect.ValueOf(data.CacheManager) - if cmValue.Kind() == reflect.Ptr { - cmValue = cmValue.Elem() - } - - requestSyncMethod := cmValue.MethodByName("requestSync") - if !requestSyncMethod.IsValid() { - glog.V(3).Infof("Warning: Cannot find requestSync method in CacheManager, sync to Redis will be handled by StoreCompleteDataToPending") - return - } - - // SyncSource = 1 (based on iota: SyncUser=0, SyncSource=1) - const SyncSource = 1 - - for _, syncReq := range syncReqs { - // Create SyncRequest struct value - // SyncRequest has: UserID string, SourceID string, Type SyncType (int) - syncRequestValue := reflect.New(reflect.TypeOf(struct { - UserID string - SourceID string - Type int - }{})).Elem() - syncRequestValue.Field(0).SetString(syncReq.userID) - syncRequestValue.Field(1).SetString(syncReq.sourceID) - syncRequestValue.Field(2).SetInt(SyncSource) - - // Call requestSync method - requestSyncMethod.Call([]reflect.Value{syncRequestValue}) - glog.V(3).Infof("Triggered sync to Redis for user: %s, source: %s, app: %s", syncReq.userID, syncReq.sourceID, appName) - } - }() + affected := data.CacheManager.RemoveAppFromAllSources(appName, sourceID) + glog.V(3).Infof("App removal from cache completed for app: %s %s, affected %d users", appID, appName, affected) } // cleanupSuspendedAppsFromLatestData checks all apps in LatestData.Data.Apps for suspend/remove labels @@ -758,11 +592,14 @@ func (d *DetailFetchStep) cleanupSuspendedAppsFromLatestData(data *SyncContext) } sourceID := "" - if marketSource := data.GetMarketSource(); marketSource != nil { - // IMPORTANT: use MarketSource.ID as the key for Sources map (not Name) - sourceID = marketSource.ID + marketSource := data.GetMarketSource() + if marketSource == nil { + glog.Error("[DetailFetchStep] MarketSource not found") + return } + sourceID = marketSource.ID + // Collect apps to remove appsToRemove := make([]struct { appID string @@ -847,7 +684,7 @@ func (d *DetailFetchStep) cleanupSuspendedAppsFromLatestData(data *SyncContext) } } if appInfoMapForRemoval != nil { - d.removeAppFromCache(appIDForRemoval, appInfoMapForRemoval, data) + d.removeAppFromCache(appIDForRemoval, appInfoMapForRemoval, data, marketSource) } } } @@ -1002,34 +839,8 @@ func (d *DetailFetchStep) preserveFieldsForDelistedApp(originalMap, detailMap ma // isAppInstalled determines whether the given app is currently installed for the active source. func (d *DetailFetchStep) isAppInstalled(appName, sourceID string, data *SyncContext) bool { - if appName == "" || sourceID == "" || data == nil || data.Cache == nil || data.CacheManager == nil { - return false - } - - // English comment: use try read lock to safely inspect installation states - if !data.CacheManager.TryRLock() { - glog.Warningf("[TryRLock] Warning: Read lock not available for isAppInstalled check, returning false, source: %s, name: %s", sourceID, appName) + if appName == "" || sourceID == "" || data == nil || data.CacheManager == nil { return false } - defer data.CacheManager.RUnlock() - - for _, userData := range data.Cache.Users { - if userData == nil { - continue - } - sourceData, ok := userData.Sources[sourceID] - if !ok || sourceData == nil { - continue - } - for _, appState := range sourceData.AppStateLatest { - if appState == nil { - continue - } - if appState.Status.Name == appName && appState.Status.State != "uninstalled" { - return true - } - } - } - - return false + return data.CacheManager.IsAppInstalled(sourceID, appName) } diff --git a/internal/v2/appinfo/syncerfn/hash_comparison_step.go b/internal/v2/appinfo/syncerfn/hash_comparison_step.go index 8a41b78..5b98268 100644 --- a/internal/v2/appinfo/syncerfn/hash_comparison_step.go +++ b/internal/v2/appinfo/syncerfn/hash_comparison_step.go @@ -87,11 +87,16 @@ func (h *HashComparisonStep) Execute(ctx context.Context, data *SyncContext) err data.RemoteHash = hashResponse.Hash - // Calculate local hash with proper locking + // Calculate local hash if data.CacheManager != nil { - data.CacheManager.RLock() - data.LocalHash = h.calculateLocalHash(data.Cache, data.GetMarketSource()) - data.CacheManager.RUnlock() + data.LocalHash = data.CacheManager.GetSourceOthersHash(marketSource.ID) + if data.LocalHash == "" { + if data.Cache == nil || len(data.Cache.Users) == 0 { + data.LocalHash = "empty_cache_no_users" + } else { + data.LocalHash = "no_source_hash" + } + } } // Compare hashes and set result diff --git a/internal/v2/appinfo/syncerfn/step_interface.go b/internal/v2/appinfo/syncerfn/step_interface.go index e7e2945..bbf9b1d 100644 --- a/internal/v2/appinfo/syncerfn/step_interface.go +++ b/internal/v2/appinfo/syncerfn/step_interface.go @@ -68,7 +68,7 @@ func NewSyncContextWithManager(cache *types.CacheData, cacheManager types.CacheM Client: resty.New(), Cache: cache, CacheManager: cacheManager, - LatestData: &AppStoreInfoResponse{}, + LatestData: nil, DetailedApps: make(map[string]interface{}), AppIDs: make([]string, 0), Errors: make([]error, 0), diff --git a/internal/v2/runtime/store.go b/internal/v2/runtime/store.go index e0687d5..e913070 100644 --- a/internal/v2/runtime/store.go +++ b/internal/v2/runtime/store.go @@ -4,8 +4,6 @@ import ( "fmt" "sync" "time" - - "github.com/golang/glog" ) // StateStore manages the current runtime state in memory @@ -30,11 +28,8 @@ func NewStateStore() *StateStore { // UpdateAppState updates or creates an app flow state func (s *StateStore) UpdateAppState(state *AppFlowState) { - if !s.mu.TryLock() { - glog.Warning("[TryLock] Failed to acquire lock for UpdateAppState, skipping update for app: %s", state.AppName) - return - } - defer s.mu.Unlock() + // s.mu.Lock() + // defer s.mu.Unlock() key := s.getAppStateKey(state.UserID, state.SourceID, state.AppName) state.LastUpdate = time.Now() @@ -44,10 +39,7 @@ func (s *StateStore) UpdateAppState(state *AppFlowState) { // GetAppState retrieves an app flow state func (s *StateStore) GetAppState(userID, sourceID, appName string) (*AppFlowState, bool) { - if !s.mu.TryRLock() { - glog.Warningf("[TryRLock] Failed to acquire read lock for GetAppState, returning empty for app: %s", appName) - return nil, false - } + s.mu.RLock() defer s.mu.RUnlock() key := s.getAppStateKey(userID, sourceID, appName) @@ -57,10 +49,7 @@ func (s *StateStore) GetAppState(userID, sourceID, appName string) (*AppFlowStat // GetAllAppStates returns all app states func (s *StateStore) GetAllAppStates() map[string]*AppFlowState { - if !s.mu.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for GetAllAppStates, returning empty map") - return make(map[string]*AppFlowState) - } + s.mu.RLock() defer s.mu.RUnlock() result := make(map[string]*AppFlowState) @@ -72,10 +61,7 @@ func (s *StateStore) GetAllAppStates() map[string]*AppFlowState { // UpdateTask updates or creates a task state func (s *StateStore) UpdateTask(task *TaskState) { - if !s.mu.TryLock() { - glog.Warningf("[TryLock] Failed to acquire lock for UpdateTask, skipping update for task: %s, opId: %s, app: %s", task.TaskID, task.OpID, task.AppName) - return - } + s.mu.Lock() defer s.mu.Unlock() s.tasks[task.TaskID] = task @@ -84,10 +70,7 @@ func (s *StateStore) UpdateTask(task *TaskState) { // GetTask retrieves a task state func (s *StateStore) GetTask(taskID string) (*TaskState, bool) { - if !s.mu.TryRLock() { - glog.Warningf("[TryRLock] Failed to acquire read lock for GetTask, returning empty for task: %s", taskID) - return nil, false - } + s.mu.RLock() defer s.mu.RUnlock() task, ok := s.tasks[taskID] @@ -96,10 +79,7 @@ func (s *StateStore) GetTask(taskID string) (*TaskState, bool) { // GetAllTasks returns all tasks func (s *StateStore) GetAllTasks() map[string]*TaskState { - if !s.mu.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for GetAllTasks, returning empty map") - return make(map[string]*TaskState) - } + s.mu.RLock() defer s.mu.RUnlock() result := make(map[string]*TaskState) @@ -111,10 +91,7 @@ func (s *StateStore) GetAllTasks() map[string]*TaskState { // RemoveTask removes a completed/failed/canceled task after some time func (s *StateStore) RemoveTask(taskID string) { - if !s.mu.TryLock() { - glog.Warningf("[TryLock] Failed to acquire lock for RemoveTask, skipping removal for task: %s", taskID) - return - } + s.mu.Lock() defer s.mu.Unlock() delete(s.tasks, taskID) @@ -123,10 +100,7 @@ func (s *StateStore) RemoveTask(taskID string) { // UpdateComponent updates or creates a component status func (s *StateStore) UpdateComponent(component *ComponentStatus) { - if !s.mu.TryLock() { - glog.Warningf("[TryLock] Failed to acquire lock for UpdateComponent, skipping update for component: %s", component.Name) - return - } + s.mu.Lock() defer s.mu.Unlock() component.LastCheck = time.Now() @@ -136,10 +110,7 @@ func (s *StateStore) UpdateComponent(component *ComponentStatus) { // GetComponent retrieves a component status func (s *StateStore) GetComponent(name string) (*ComponentStatus, bool) { - if !s.mu.TryRLock() { - glog.Warningf("[TryRLock] Failed to acquire read lock for GetComponent, returning empty for component: %s", name) - return nil, false - } + s.mu.RLock() defer s.mu.RUnlock() component, ok := s.components[name] @@ -148,10 +119,7 @@ func (s *StateStore) GetComponent(name string) (*ComponentStatus, bool) { // GetAllComponents returns all component statuses func (s *StateStore) GetAllComponents() map[string]*ComponentStatus { - if !s.mu.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for GetAllComponents, returning empty map") - return make(map[string]*ComponentStatus) - } + s.mu.RLock() defer s.mu.RUnlock() result := make(map[string]*ComponentStatus) @@ -163,16 +131,7 @@ func (s *StateStore) GetAllComponents() map[string]*ComponentStatus { // GetSnapshot creates a complete snapshot of current state func (s *StateStore) GetSnapshot() *RuntimeSnapshot { - if !s.mu.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for GetSnapshot, returning empty snapshot") - return &RuntimeSnapshot{ - Timestamp: time.Now(), - AppStates: make(map[string]*AppFlowState), - Tasks: make(map[string]*TaskState), - Components: make(map[string]*ComponentStatus), - Summary: &RuntimeSummary{}, - } - } + s.mu.RLock() defer s.mu.RUnlock() snapshot := &RuntimeSnapshot{ @@ -254,10 +213,7 @@ func (s *StateStore) getAppStateKey(userID, sourceID, appName string) string { // UpdateChartRepoStatus updates chart repo status func (s *StateStore) UpdateChartRepoStatus(status *ChartRepoStatus) { - if !s.mu.TryLock() { - glog.Warning("[TryLock] Failed to acquire lock for UpdateChartRepoStatus, skipping update") - return - } + s.mu.Lock() defer s.mu.Unlock() if status != nil { @@ -269,20 +225,16 @@ func (s *StateStore) UpdateChartRepoStatus(status *ChartRepoStatus) { // GetChartRepoStatus retrieves chart repo status func (s *StateStore) GetChartRepoStatus() *ChartRepoStatus { - if !s.mu.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for GetChartRepoStatus, returning nil") - return nil - } + s.mu.RLock() defer s.mu.RUnlock() + return s.chartRepo } // GetLastUpdate returns the last update time func (s *StateStore) GetLastUpdate() time.Time { - if !s.mu.TryRLock() { - glog.Warning("[TryRLock] Failed to acquire read lock for GetLastUpdate, returning zero time") - return time.Time{} - } + s.mu.RLock() defer s.mu.RUnlock() + return s.lastUpdate } diff --git a/internal/v2/task/app_cancel.go b/internal/v2/task/app_cancel.go index dac58e2..7202bec 100644 --- a/internal/v2/task/app_cancel.go +++ b/internal/v2/task/app_cancel.go @@ -59,7 +59,7 @@ func (tm *TaskModule) AppCancel(task *Task) (string, error) { } // Send HTTP request and get response - glog.Infof("Sending HTTP request for app cancel: task=%s, app_name=%s", task.ID, appName) + glog.Infof("[APP] Sending HTTP request for app cancel: task=%s, app_name=%s", task.ID, appName) response, err := sendHttpRequest(http.MethodPost, urlStr, headers, nil) if err != nil { glog.Errorf("HTTP request failed for app cancel: task=%s, error=%v", task.ID, err) @@ -78,7 +78,7 @@ func (tm *TaskModule) AppCancel(task *Task) (string, error) { return string(errorJSON), err } - glog.Infof("HTTP request completed successfully for app cancel: task=%s, response_length=%d", task.ID, len(response)) + glog.Infof("[APP] HTTP request completed successfully for app cancel: task=%s, response_length=%d", task.ID, len(response)) // Parse response to extract opID if cancel is successful var responseData map[string]interface{} diff --git a/internal/v2/task/app_clone.go b/internal/v2/task/app_clone.go index 50d6e49..a8c2634 100644 --- a/internal/v2/task/app_clone.go +++ b/internal/v2/task/app_clone.go @@ -7,8 +7,6 @@ import ( "os" "strings" - "market/internal/v2/settings" - "github.com/golang/glog" ) @@ -144,13 +142,9 @@ func (tm *TaskModule) AppClone(task *Task) (string, error) { } // Get VC from purchase receipt using rawAppName and inject into environment variables - var settingsManager *settings.SettingsManager - if tm.mu.TryRLock() { - settingsManager = tm.settingsManager - tm.mu.RUnlock() - } else { - glog.Warningf("Failed to acquire read lock for settingsManager, skipping VC injection for task: %s", task.ID) - } + tm.mu.RLock() + settingsManager := tm.settingsManager + tm.mu.RUnlock() if settingsManager != nil { vc := getVCForClone(settingsManager, user, rawAppName, task.Metadata) @@ -218,7 +212,7 @@ func (tm *TaskModule) AppClone(task *Task) (string, error) { } // Send HTTP request and get response - glog.Infof("Sending HTTP request for app clone: task=%s", task.ID) + glog.Infof("[APP] Sending HTTP request for app clone: task=%s", task.ID) response, err := sendHttpRequest(http.MethodPost, urlStr, headers, strings.NewReader(string(ms))) if err != nil { glog.Errorf("HTTP request failed for app clone: task=%s, error=%v", task.ID, err) diff --git a/internal/v2/task/app_install.go b/internal/v2/task/app_install.go index fca4e2b..caa51d3 100644 --- a/internal/v2/task/app_install.go +++ b/internal/v2/task/app_install.go @@ -9,8 +9,6 @@ import ( "os" "strings" - "market/internal/v2/settings" - "github.com/golang/glog" ) @@ -128,13 +126,9 @@ func (tm *TaskModule) AppInstall(task *Task) (string, error) { } // Get VC from purchase receipt and inject into environment variables - var settingsManager *settings.SettingsManager - if tm.mu.TryRLock() { - settingsManager = tm.settingsManager - tm.mu.RUnlock() - } else { - glog.Warningf("[TryRLock] Failed to acquire read lock for settingsManager, skipping VC injection for task: %s, user: %s, app: %s", task.ID, task.User, task.AppName) - } + tm.mu.RLock() + settingsManager := tm.settingsManager + tm.mu.RUnlock() if settingsManager != nil { vcAppID := appName @@ -202,7 +196,7 @@ func (tm *TaskModule) AppInstall(task *Task) (string, error) { } // Send HTTP request and get response - glog.V(2).Infof("Sending HTTP request for app installation: task=%s, data: %s", task.ID, string(ms)) + glog.Infof("[APP] Sending HTTP request for app installation: task=%s, data: %s", task.ID, string(ms)) response, err := sendHttpRequest(http.MethodPost, urlStr, headers, strings.NewReader(string(ms))) if err != nil { glog.Errorf("HTTP request failed for app installation: task=%s, error=%v", task.ID, err) @@ -222,7 +216,7 @@ func (tm *TaskModule) AppInstall(task *Task) (string, error) { return string(errorJSON), err } - glog.V(2).Infof("HTTP request completed successfully for app installation: task=%s, response_length=%d, resp=%s", task.ID, len(response), response) + glog.Infof("[APP] HTTP request completed successfully for app installation: task=%s, response_length=%d, resp=%s", task.ID, len(response), response) // Parse response to extract opID if installation is successful var responseData map[string]interface{} diff --git a/internal/v2/task/app_uninstall.go b/internal/v2/task/app_uninstall.go index a79a9af..c663e72 100644 --- a/internal/v2/task/app_uninstall.go +++ b/internal/v2/task/app_uninstall.go @@ -71,7 +71,7 @@ func (tm *TaskModule) AppUninstall(task *Task) (string, error) { } // Send HTTP request and get response - glog.Infof("Sending HTTP request for app uninstallation: task=%s, all=%v", task.ID, all) + glog.Infof("[APP] Sending HTTP request for app uninstallation: task=%s, all=%v", task.ID, all) // Create request body with all parameter requestBody := map[string]interface{}{ @@ -99,7 +99,7 @@ func (tm *TaskModule) AppUninstall(task *Task) (string, error) { return string(errorJSON), err } - glog.Infof("HTTP request completed successfully for app uninstallation: task=%s, response_length=%d", task.ID, len(response)) + glog.Infof("[APP] HTTP request completed successfully for app uninstallation: task=%s, response_length=%d", task.ID, len(response)) // Parse response to extract opID if uninstallation is successful var responseData map[string]interface{} diff --git a/internal/v2/task/app_upgrade.go b/internal/v2/task/app_upgrade.go index d31e7ff..9b8fff3 100644 --- a/internal/v2/task/app_upgrade.go +++ b/internal/v2/task/app_upgrade.go @@ -120,7 +120,7 @@ func (tm *TaskModule) AppUpgrade(task *Task) (string, error) { } // Send HTTP request and get response - glog.Infof("Sending HTTP request for app upgrade: task=%s, version=%s", task.ID, version) + glog.Infof("[APP] Sending HTTP request for app upgrade: task=%s, version=%s", task.ID, version) response, err := sendHttpRequest(http.MethodPost, urlStr, headers, strings.NewReader(string(ms))) if err != nil { glog.Errorf("HTTP request failed for app upgrade: task=%s, error=%v", task.ID, err) @@ -140,7 +140,7 @@ func (tm *TaskModule) AppUpgrade(task *Task) (string, error) { return string(errorJSON), err } - glog.Infof("HTTP request completed successfully for app upgrade: task=%s, response_length=%d", task.ID, len(response)) + glog.Infof("[APP] HTTP request completed successfully for app upgrade: task=%s, response_length=%d", task.ID, len(response)) // Create success result successResult := map[string]interface{}{ diff --git a/internal/v2/task/taskmodule.go b/internal/v2/task/taskmodule.go index fbb0f74..6ae7469 100644 --- a/internal/v2/task/taskmodule.go +++ b/internal/v2/task/taskmodule.go @@ -122,108 +122,27 @@ func NewTaskModule() (*TaskModule, error) { // SetHistoryModule sets the history module for recording task events func (tm *TaskModule) SetHistoryModule(historyModule *history.HistoryModule) { - // Retry mechanism for acquiring lock (max 3 attempts with 10ms delay) - maxRetries := 3 - retryDelay := 10 * time.Millisecond - - var lockAcquired bool - for attempt := 0; attempt < maxRetries; attempt++ { - if tm.mu.TryLock() { - lockAcquired = true - break - } - - if attempt < maxRetries-1 { - time.Sleep(retryDelay) - continue - } - } - - if !lockAcquired { - glog.Warningf("[%s] Failed to acquire lock for SetHistoryModule after %d attempts", tm.instanceID, maxRetries) - return - } + tm.mu.Lock() defer tm.mu.Unlock() tm.historyModule = historyModule } // SetDataSender sets the data sender for sending system updates func (tm *TaskModule) SetDataSender(dataSender DataSenderInterface) { - // Retry mechanism for acquiring lock (max 3 attempts with 10ms delay) - maxRetries := 3 - retryDelay := 10 * time.Millisecond - - var lockAcquired bool - for attempt := 0; attempt < maxRetries; attempt++ { - if tm.mu.TryLock() { - lockAcquired = true - break - } - - if attempt < maxRetries-1 { - time.Sleep(retryDelay) - continue - } - } - - if !lockAcquired { - glog.Warningf("[%s] Failed to acquire lock for SetDataSender after %d attempts", tm.instanceID, maxRetries) - return - } + tm.mu.Lock() defer tm.mu.Unlock() tm.dataSender = dataSender } // SetSettingsManager sets the settings manager for accessing Redis func (tm *TaskModule) SetSettingsManager(settingsManager *settings.SettingsManager) { - // Retry mechanism for acquiring lock (max 3 attempts with 10ms delay) - maxRetries := 3 - retryDelay := 10 * time.Millisecond - - var lockAcquired bool - for attempt := 0; attempt < maxRetries; attempt++ { - if tm.mu.TryLock() { - lockAcquired = true - break - } - - if attempt < maxRetries-1 { - time.Sleep(retryDelay) - continue - } - } - - if !lockAcquired { - glog.Warningf("[%s] Failed to acquire lock for SetSettingsManager after %d attempts", tm.instanceID, maxRetries) - return - } + tm.mu.Lock() defer tm.mu.Unlock() tm.settingsManager = settingsManager } // AddTask adds a new task to the pending queue func (tm *TaskModule) AddTask(taskType TaskType, appName string, user string, metadata map[string]interface{}, callback TaskCallback) (*Task, error) { - // Retry mechanism for acquiring lock (max 3 attempts with 10ms delay) - maxRetries := 3 - retryDelay := 10 * time.Millisecond - - var lockAcquired bool - for attempt := 0; attempt < maxRetries; attempt++ { - if tm.mu.TryLock() { - lockAcquired = true - break - } - - if attempt < maxRetries-1 { - time.Sleep(retryDelay) - continue - } - } - - if !lockAcquired { - return nil, fmt.Errorf("failed to acquire lock for AddTask after %d attempts", maxRetries) - } - if metadata == nil { metadata = make(map[string]interface{}) } @@ -239,19 +158,16 @@ func (tm *TaskModule) AddTask(taskType TaskType, appName string, user string, me Callback: callback, } - // Add to pending queue first (fast memory operation) + tm.mu.Lock() tm.pendingTasks = append(tm.pendingTasks, task) tm.mu.Unlock() - // Persist task outside of lock (database operation may be slow) if err := tm.persistTask(task); err != nil { glog.Errorf("[%s] Failed to persist task %s: %v", tm.instanceID, task.ID, err) - // Don't return error - task is already in memory queue, will be persisted later } glog.V(2).Infof("[%s] Task added: ID=%s, Type=%d, AppName=%s, User=%s, HasCallback=%v", tm.instanceID, task.ID, task.Type, task.AppName, user, callback != nil) - // Record task addition in history (outside of lock) tm.recordTaskHistory(task, user) return task, nil @@ -466,42 +382,126 @@ func (tm *TaskModule) taskExecutor() { } } -// executeNextTask gets the earliest pending task and executes it -func (tm *TaskModule) executeNextTask() { - if !tm.mu.TryLock() { - return +// dequeueNextPendingTask atomically moves the next pending task to running state. +// Returns nil if no pending task is available. +func (tm *TaskModule) dequeueNextPendingTask() *Task { + tm.mu.Lock() + defer tm.mu.Unlock() + + if len(tm.pendingTasks) == 0 { + return nil } - var task *Task - if len(tm.pendingTasks) > 0 { - // Get the first task (FIFO) - task = tm.pendingTasks[0] - tm.pendingTasks = tm.pendingTasks[1:] + task := tm.pendingTasks[0] + tm.pendingTasks = tm.pendingTasks[1:] - // Move to running tasks - task.Status = Running - now := time.Now() - task.StartedAt = &now - tm.runningTasks[task.ID] = task - } + task.Status = Running + now := time.Now() + task.StartedAt = &now + tm.runningTasks[task.ID] = task + return task +} + +// removeRunningTask atomically removes a task from the running tasks map. +func (tm *TaskModule) removeRunningTask(taskID string) { + tm.mu.Lock() + delete(tm.runningTasks, taskID) tm.mu.Unlock() +} + +// completeRunningTask atomically finds a running task by opID or appName+user, +// updates its status, and removes it from the running tasks map. +// Returns the task for I/O operations outside the lock, or an error if not found. +func (tm *TaskModule) completeRunningTask( + opID, appName, user string, + taskTypes []TaskType, + status TaskStatus, + result, errorMsg string, +) (*Task, error) { + tm.mu.Lock() + defer tm.mu.Unlock() + + var target *Task + + if opID != "" { + for _, t := range tm.runningTasks { + if t.OpID == opID && matchTaskTypes(t.Type, taskTypes) { + target = t + break + } + } + } + + if target == nil && appName != "" && user != "" { + for _, t := range tm.runningTasks { + if matchTaskTypes(t.Type, taskTypes) && t.AppName == appName && t.User == user { + target = t + break + } + } + } + if target == nil { + return nil, fmt.Errorf("no matching running task found (opID=%s, app=%s, user=%s)", opID, appName, user) + } + + target.Status = status + now := time.Now() + target.CompletedAt = &now + target.Result = result + if errorMsg != "" { + target.ErrorMsg = errorMsg + } + + delete(tm.runningTasks, target.ID) + return target, nil +} + +func matchTaskTypes(t TaskType, types []TaskType) bool { + for _, tt := range types { + if t == tt { + return true + } + } + return false +} + +// executeNextTask gets the earliest pending task and executes it +func (tm *TaskModule) executeNextTask() { + task := tm.dequeueNextPendingTask() if task == nil { return } - // Persist task state outside of lock (database operation may be slow) if err := tm.persistTask(task); err != nil { glog.Errorf("[%s] Failed to persist running task state for %s: %v", tm.instanceID, task.ID, err) } glog.V(2).Infof("[%s] Executing task: ID=%s, Type=%d, AppName=%s", tm.instanceID, task.ID, task.Type, task.AppName) - // Execute the task outside of lock (may take minutes) tm.executeTask(task) } +// handleTaskFailure handles the common failure path for task execution +func (tm *TaskModule) handleTaskFailure(task *Task, result string, err error, failureDesc string) { + glog.Errorf("[TASK] %s for task: %s, name: %s, error: %v", failureDesc, task.ID, task.AppName, err) + task.Result = result + task.Status = Failed + task.ErrorMsg = fmt.Sprintf("%s: %v, task: %s", failureDesc, err, task.ID) + now := time.Now() + task.CompletedAt = &now + + if task.Callback != nil { + task.Callback(result, err) + } + + tm.removeRunningTask(task.ID) + tm.finalizeTaskPersistence(task) + tm.sendTaskFinishedUpdate(task, "failed") + tm.recordTaskResult(task, result, err) +} + // executeTask executes the actual task logic func (tm *TaskModule) executeTask(task *Task) { var result string @@ -510,264 +510,59 @@ func (tm *TaskModule) executeTask(task *Task) { glog.V(2).Infof("[TASK] Starting task execution: ID=%s, Type=%s, App=%s, User=%s", task.ID, getTaskTypeString(task.Type), task.AppName, task.User) - // Send task execution system update tm.sendTaskExecutionUpdate(task) switch task.Type { case InstallApp: - // Execute app installation glog.V(2).Infof("[TASK] Executing app installation for task: %s", task.ID) result, err = tm.AppInstall(task) - task.Result = result if err != nil { - glog.Errorf("[TASK] App installation failed for task: %s, name: %s, error: %v", task.ID, task.AppName, err) - task.Status = Failed - task.ErrorMsg = fmt.Sprintf("Installation failed: %v, task: %s", err, task.ID) - now := time.Now() - task.CompletedAt = &now - - // Call callback if exists (for synchronous requests) - if task.Callback != nil { - glog.Errorf("[TASK] Calling callback for failed task: %s", task.ID) - task.Callback(result, err) - } - - // Remove failed task from running tasks - if tm.mu.TryLock() { - delete(tm.runningTasks, task.ID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed failed task from running tasks: ID=%s", task.ID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock to remove task from running tasks: ID=%s, will retry later", task.ID) - // Retry in a goroutine with TryLock - go func(taskID string) { - time.Sleep(100 * time.Millisecond) - if tm.mu.TryLock() { - delete(tm.runningTasks, taskID) - tm.mu.Unlock() - glog.V(2).Infof("Removed failed task from running tasks (retry): ID=%s", taskID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock on retry for task: ID=%s, task will be cleaned up later", taskID) - } - }(task.ID) - } - - tm.finalizeTaskPersistence(task) - - // Send task finished system update - tm.sendTaskFinishedUpdate(task, "failed") - - tm.recordTaskResult(task, result, err) + tm.handleTaskFailure(task, result, err, "Installation failed") return } glog.V(2).Infof("[TASK] App installation completed successfully for task: %s", task.ID) case UninstallApp: - // Execute app uninstallation glog.V(2).Infof("[TASK] Executing app uninstallation for task: %s", task.ID) result, err = tm.AppUninstall(task) - task.Result = result if err != nil { - glog.Errorf("[TASK] App uninstallation failed for task: %s, name: %s, error: %v", task.ID, task.AppName, err) - task.Status = Failed - task.ErrorMsg = fmt.Sprintf("Uninstallation failed: %v", err) - now := time.Now() - task.CompletedAt = &now - - // Call callback if exists (for synchronous requests) - if task.Callback != nil { - glog.Errorf("[TASK] Calling callback for failed task: %s", task.ID) - task.Callback(result, err) - } - - // Remove failed task from running tasks - if tm.mu.TryLock() { - delete(tm.runningTasks, task.ID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed failed task from running tasks: ID=%s", task.ID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock to remove task from running tasks: ID=%s, will retry later", task.ID) - // Retry in a goroutine with TryLock - go func(taskID string) { - time.Sleep(100 * time.Millisecond) - if tm.mu.TryLock() { - delete(tm.runningTasks, taskID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed failed task from running tasks (retry): ID=%s", taskID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock on retry for task: ID=%s, task will be cleaned up later", taskID) - } - }(task.ID) - } - - tm.finalizeTaskPersistence(task) - - // Send task finished system update - tm.sendTaskFinishedUpdate(task, "failed") - - tm.recordTaskResult(task, result, err) + tm.handleTaskFailure(task, result, err, "Uninstallation failed") return } glog.V(2).Infof("[TASK] App uninstallation completed successfully for task: %s", task.ID) case CancelAppInstall: - // Execute app cancel - cancel running install tasks glog.V(2).Infof("[TASK] Executing app cancel for task: %s", task.ID) - - // First, call AppCancel to send cancel request to app service result, err = tm.AppCancel(task) - task.Result = result if err != nil { - glog.Errorf("[TASK] App cancel failed for task: %s, name: %s, error: %v", task.ID, task.AppName, err) - task.Status = Failed - task.ErrorMsg = fmt.Sprintf("Cancel failed: %v", err) - now := time.Now() - task.CompletedAt = &now - - // Call callback if exists (for synchronous requests) - if task.Callback != nil { - glog.Errorf("[TASK] Calling callback for failed task: %s", task.ID) - task.Callback(result, err) - } - - // Remove failed task from running tasks - if tm.mu.TryLock() { - delete(tm.runningTasks, task.ID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed failed task from running tasks: ID=%s", task.ID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock to remove task from running tasks: ID=%s, will retry later", task.ID) - // Retry in a goroutine with TryLock - go func(taskID string) { - time.Sleep(100 * time.Millisecond) - if tm.mu.TryLock() { - delete(tm.runningTasks, taskID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed failed task from running tasks (retry): ID=%s", taskID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock on retry for task: ID=%s, task will be cleaned up later", taskID) - } - }(task.ID) - } - - tm.finalizeTaskPersistence(task) - - // Send task finished system update - tm.sendTaskFinishedUpdate(task, "failed") - - tm.recordTaskResult(task, result, err) + tm.handleTaskFailure(task, result, err, "Cancel failed") return } - // Then, call InstallTaskCanceled to mark the task as canceled in our system - err = tm.InstallTaskCanceled(task.AppName, "", "", task.User) - if err != nil { - glog.Errorf("[TASK] InstallTaskCanceled failed for task: %s, app: %s, error: %v", task.ID, task.AppName, err) - // Don't fail the entire operation if InstallTaskCanceled fails - // Just log the error and continue - glog.Errorf("[TASK] Warning: InstallTaskCanceled failed but AppCancel succeeded for task: %s", task.ID) + if cancelErr := tm.InstallTaskCanceled(task.AppName, "", "", task.User); cancelErr != nil { + glog.Errorf("[TASK] InstallTaskCanceled failed for task: %s, app: %s, error: %v", task.ID, task.AppName, cancelErr) } - - glog.V(2).Infof("App cancel completed successfully for task: %s, app: %s", task.ID, task.AppName) + glog.V(2).Infof("[TASK] App cancel completed successfully for task: %s, app: %s", task.ID, task.AppName) case UpgradeApp: - // Execute app upgrade glog.V(2).Infof("[TASK] Executing app upgrade for task: %s", task.ID) result, err = tm.AppUpgrade(task) - task.Result = result if err != nil { - glog.Errorf("[TASK] App upgrade failed for task: %s, app: %s, error: %v", task.ID, task.AppName, err) - task.Status = Failed - task.ErrorMsg = fmt.Sprintf("Upgrade failed: %v", err) - now := time.Now() - task.CompletedAt = &now - - // Call callback if exists (for synchronous requests) - if task.Callback != nil { - glog.Errorf("[TASK] Calling callback for failed task: %s", task.ID) - task.Callback(result, err) - } - - // Remove failed task from running tasks - if tm.mu.TryLock() { - delete(tm.runningTasks, task.ID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed failed task from running tasks: ID=%s", task.ID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock to remove task from running tasks: ID=%s, will retry later", task.ID) - // Retry in a goroutine with TryLock - go func(taskID string) { - time.Sleep(100 * time.Millisecond) - if tm.mu.TryLock() { - delete(tm.runningTasks, taskID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed failed task from running tasks (retry): ID=%s", taskID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock on retry for task: ID=%s, task will be cleaned up later", taskID) - } - }(task.ID) - } - - tm.finalizeTaskPersistence(task) - - // Send task finished system update - tm.sendTaskFinishedUpdate(task, "failed") - - tm.recordTaskResult(task, result, err) + tm.handleTaskFailure(task, result, err, "Upgrade failed") return } glog.V(2).Infof("[TASK] App upgrade completed successfully for task: %s, app: %s", task.ID, task.AppName) case CloneApp: - // Execute app clone glog.V(2).Infof("[TASK] Executing app clone for task: %s", task.ID) result, err = tm.AppClone(task) - task.Result = result if err != nil { - glog.Errorf("[TASK] App clone failed for task: %s, app: %s, error: %v", task.ID, task.AppName, err) - task.Status = Failed - task.ErrorMsg = fmt.Sprintf("Clone failed: %v", err) - now := time.Now() - task.CompletedAt = &now - - // Call callback if exists (for synchronous requests) - if task.Callback != nil { - glog.Errorf("[TASK] Calling callback for failed task: %s", task.ID) - task.Callback(result, err) - } - - // Remove failed task from running tasks - if tm.mu.TryLock() { - delete(tm.runningTasks, task.ID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed failed task from running tasks: ID=%s", task.ID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock to remove task from running tasks: ID=%s, will retry later", task.ID) - // Retry in a goroutine with TryLock - go func(taskID string) { - time.Sleep(100 * time.Millisecond) - if tm.mu.TryLock() { - delete(tm.runningTasks, taskID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed failed task from running tasks (retry): ID=%s", taskID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock on retry for task: ID=%s, task will be cleaned up later", taskID) - } - }(task.ID) - } - - tm.finalizeTaskPersistence(task) - - // Send task finished system update - tm.sendTaskFinishedUpdate(task, "failed") - - tm.recordTaskResult(task, result, err) + tm.handleTaskFailure(task, result, err, "Clone failed") return } glog.V(2).Infof("[TASK] App clone completed successfully for task: %s, app: %s", task.ID, task.AppName) } - // Task completed successfully task.Result = result task.Status = Completed now := time.Now() @@ -775,34 +570,10 @@ func (tm *TaskModule) executeTask(task *Task) { glog.V(2).Infof("[TASK] Task completed successfully: ID=%s, Type=%s, AppName=%s, User=%s, Duration=%v", task.ID, getTaskTypeString(task.Type), task.AppName, task.User, now.Sub(*task.StartedAt)) - // Log the result summary - glog.V(2).Infof("[TASK] Task result summary: ID=%s, Result length=%d bytes", task.ID, len(result)) - - if tm.mu.TryLock() { - delete(tm.runningTasks, task.ID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed completed task from running tasks: ID=%s", task.ID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock to remove task from running tasks: ID=%s, will retry later", task.ID) - // Retry in a goroutine with TryLock - go func(taskID string) { - time.Sleep(100 * time.Millisecond) - if tm.mu.TryLock() { - delete(tm.runningTasks, taskID) - tm.mu.Unlock() - glog.V(2).Infof("[TASK] Removed completed task from running tasks (retry): ID=%s", taskID) - } else { - glog.Warningf("[TASK][TryLock] Failed to acquire lock on retry for task: ID=%s, task will be cleaned up later", taskID) - } - }(task.ID) - } - + tm.removeRunningTask(task.ID) tm.finalizeTaskPersistence(task) - - // Send task finished system update tm.sendTaskFinishedUpdate(task, "succeed") - // Call callback if exists (for synchronous requests) if task.Callback != nil { glog.V(3).Infof("[TASK] Calling callback for successful task: %s", task.ID) task.Callback(result, nil) @@ -902,6 +673,8 @@ func (tm *TaskModule) statusChecker() { // checkRunningTasksStatus checks the status of all running tasks func (tm *TaskModule) checkRunningTasksStatus() { + tm.mu.RLock() + defer tm.mu.RUnlock() for taskID, task := range tm.runningTasks { glog.V(2).Infof("[TASK] Checking status for task: ID=%s", taskID) @@ -1114,23 +887,17 @@ func (tm *TaskModule) GetInstanceID() string { } // HasPendingOrRunningInstallTask checks if there are any pending or running install/clone tasks for the given app and user -// Returns (hasTask, lockAcquired) where hasTask indicates if there are such tasks, and lockAcquired indicates if the lock was successfully acquired -// If lockAcquired is false, the result is unreliable and the caller should handle accordingly (e.g., delay processing) +// Returns (hasTask, lockAcquired) - lockAcquired is always true with blocking lock, kept for API compatibility func (tm *TaskModule) HasPendingOrRunningInstallTask(appName, user string) (hasTask bool, lockAcquired bool) { - if !tm.mu.TryRLock() { - glog.Warningf("[TryLock] failed to acquire lock for HasPendingOrRunningInstallTask, user: %s, app: %s", user, appName) - return false, false - } + tm.mu.RLock() defer tm.mu.RUnlock() - // Check running tasks for _, t := range tm.runningTasks { if t.AppName == appName && t.User == user && (t.Type == InstallApp || t.Type == CloneApp) { return true, true } } - // Check pending tasks for _, t := range tm.pendingTasks { if t.AppName == appName && t.User == user && (t.Type == InstallApp || t.Type == CloneApp) { return true, true @@ -1142,401 +909,130 @@ func (tm *TaskModule) HasPendingOrRunningInstallTask(appName, user string) (hasT // InstallTaskSucceed marks an install or clone task as completed successfully by opID or appName+user func (tm *TaskModule) InstallTaskSucceed(opID, appName, user string) error { - if !tm.mu.TryLock() { - glog.Warningf("[TryLock] failed to acquire lock for InstallTaskSucceed, user: %s, opId: %s, app: %s", user, opID, appName) - return fmt.Errorf("failed to acquire lock for InstallTaskSucceed") - } - defer tm.mu.Unlock() - - // First try to find the install or clone task with matching opID in running tasks - var targetTask *Task - for _, task := range tm.runningTasks { - if task.OpID == opID && (task.Type == InstallApp || task.Type == CloneApp) { - targetTask = task - break - } - } - - // If opID match failed, try to find by appName and user - if targetTask == nil && appName != "" && user != "" { - for _, task := range tm.runningTasks { - if (task.Type == InstallApp || task.Type == CloneApp) && task.AppName == appName && task.User == user { - targetTask = task - break - } - } - } - - if targetTask == nil { - glog.Warningf("[%s] InstallTaskSucceed - No running install or clone task found with opID: %s or appName: %s, user: %s", - tm.instanceID, opID, appName, user) - return fmt.Errorf("no running install or clone task found with opID: %s or appName: %s, user: %s", opID, appName, user) + resultMsg := "Installation completed successfully via external signal" + task, err := tm.completeRunningTask(opID, appName, user, + []TaskType{InstallApp, CloneApp}, Completed, resultMsg, "") + if err != nil { + glog.Warningf("[%s] InstallTaskSucceed - %v", tm.instanceID, err) + return err } - - // Mark task as completed - targetTask.Status = Completed - now := time.Now() - targetTask.CompletedAt = &now - - // Set appropriate result message based on task type - if targetTask.Type == CloneApp { - targetTask.Result = "Clone completed successfully via external signal" - } else { - targetTask.Result = "Installation completed successfully via external signal" + if task.Type == CloneApp { + task.Result = "Clone completed successfully via external signal" } - taskTypeStr := getTaskTypeString(targetTask.Type) - glog.V(2).Infof("[%s] InstallTaskSucceed - Task marked as completed: ID=%s, Type=%s, OpID=%s, AppName=%s, User=%s, Duration=%v", - tm.instanceID, targetTask.ID, taskTypeStr, targetTask.OpID, targetTask.AppName, targetTask.User, now.Sub(*targetTask.StartedAt)) - - // Remove task from running tasks - delete(tm.runningTasks, targetTask.ID) - glog.V(2).Infof("[%s] InstallTaskSucceed - Removed completed task from running tasks: ID=%s", tm.instanceID, targetTask.ID) - - tm.finalizeTaskPersistence(targetTask) - - // Record task completion in history - resultMsg := targetTask.Result - tm.recordTaskResult(targetTask, resultMsg, nil) - - // Send task finished system update - tm.sendTaskFinishedUpdate(targetTask, "succeed") + glog.V(2).Infof("[%s] InstallTaskSucceed - Task completed: ID=%s, Type=%s, OpID=%s, App=%s, User=%s", + tm.instanceID, task.ID, getTaskTypeString(task.Type), task.OpID, task.AppName, task.User) + tm.finalizeTaskPersistence(task) + tm.recordTaskResult(task, task.Result, nil) + tm.sendTaskFinishedUpdate(task, "succeed") return nil } // InstallTaskFailed marks an install task as failed by opID or appName+user func (tm *TaskModule) InstallTaskFailed(opID, appName, user, errorMsg string) error { - if !tm.mu.TryLock() { - glog.Warningf("[TryLock] failed to acquire lock for InstallTaskFailed, user: %s, opId: %s, app: %s, error: %s", user, opID, appName, errorMsg) - return fmt.Errorf("failed to acquire lock for InstallTaskFailed") - } - defer tm.mu.Unlock() - - // First try to find the install task with matching opID in running tasks - var targetTask *Task - for _, task := range tm.runningTasks { - if task.OpID == opID && task.Type == InstallApp { - targetTask = task - break - } - } - - // If opID match failed, try to find by appName and user - if targetTask == nil && appName != "" && user != "" { - for _, task := range tm.runningTasks { - if task.Type == InstallApp && task.AppName == appName && task.User == user { - targetTask = task - break - } - } - } - - if targetTask == nil { - glog.Warningf("[%s] InstallTaskFailed - No running install task found with opID: %s or appName: %s, user: %s", - tm.instanceID, opID, appName, user) - return fmt.Errorf("no running install task found with opID: %s or appName: %s, user: %s", opID, appName, user) + task, err := tm.completeRunningTask(opID, appName, user, + []TaskType{InstallApp}, Failed, "Installation failed via external signal", errorMsg) + if err != nil { + glog.Warningf("[%s] InstallTaskFailed - %v", tm.instanceID, err) + return err } - // Mark task as failed - targetTask.Status = Failed - now := time.Now() - targetTask.CompletedAt = &now - targetTask.ErrorMsg = errorMsg - targetTask.Result = "Installation failed via external signal" - - glog.V(2).Infof("[%s] InstallTaskFailed - Task marked as failed: ID=%s, OpID=%s, AppName=%s, User=%s, Duration=%v, Error: %s", - tm.instanceID, targetTask.ID, targetTask.OpID, targetTask.AppName, targetTask.User, now.Sub(*targetTask.StartedAt), errorMsg) - - // Remove task from running tasks - delete(tm.runningTasks, targetTask.ID) - glog.V(3).Infof("[%s] InstallTaskFailed - Removed failed task from running tasks: ID=%s", tm.instanceID, targetTask.ID) - - tm.finalizeTaskPersistence(targetTask) - - // Record task failure in history - tm.recordTaskResult(targetTask, "Installation failed via external signal", fmt.Errorf(errorMsg)) - - // Send task finished system update - tm.sendTaskFinishedUpdate(targetTask, "failed") + glog.V(2).Infof("[%s] InstallTaskFailed - Task failed: ID=%s, OpID=%s, App=%s, User=%s, Error: %s", + tm.instanceID, task.ID, task.OpID, task.AppName, task.User, errorMsg) + tm.finalizeTaskPersistence(task) + tm.recordTaskResult(task, task.Result, fmt.Errorf(errorMsg)) + tm.sendTaskFinishedUpdate(task, "failed") return nil } // InstallTaskCanceled marks an install task as canceled by app name and user func (tm *TaskModule) InstallTaskCanceled(appName, appVersion, source, user string) error { - if !tm.mu.TryLock() { - glog.Warningf("[TryLock] failed to acquire lock for InstallTaskCanceled, user: %s, source: %s, app: %s, version: %s", user, source, appName, appVersion) - return fmt.Errorf("failed to acquire lock for InstallTaskCanceled") - } - defer tm.mu.Unlock() - - // Find the install task with matching criteria in running tasks - var targetTask *Task - for _, task := range tm.runningTasks { - if task.Type == InstallApp && task.AppName == appName && task.User == user { - targetTask = task - break - } - } - - if targetTask == nil { - glog.V(2).Infof("[%s] InstallTaskCanceled - No running install task found with appName: %s, user: %s", - tm.instanceID, appName, user) - return fmt.Errorf("no running install task found with appName: %s, user: %s", appName, user) + task, err := tm.completeRunningTask("", appName, user, + []TaskType{InstallApp}, Canceled, "Installation canceled via external signal", "Installation canceled via external signal") + if err != nil { + glog.V(2).Infof("[%s] InstallTaskCanceled - %v", tm.instanceID, err) + return err } - // Mark task as canceled - targetTask.Status = Canceled - now := time.Now() - targetTask.CompletedAt = &now - targetTask.ErrorMsg = "Installation canceled via external signal" - targetTask.Result = "Installation canceled via external signal" - - glog.V(2).Infof("[%s] InstallTaskCanceled - Task marked as canceled: ID=%s, AppName=%s, User=%s, Duration=%v", - tm.instanceID, targetTask.ID, appName, user, now.Sub(*targetTask.StartedAt)) - - // Remove task from running tasks - delete(tm.runningTasks, targetTask.ID) - glog.V(3).Infof("[%s] InstallTaskCanceled - Removed canceled task from running tasks: ID=%s", tm.instanceID, targetTask.ID) - - tm.finalizeTaskPersistence(targetTask) - - // Record task cancellation in history - tm.recordTaskResult(targetTask, "Installation canceled via external signal", fmt.Errorf("installation canceled")) - - // Send task finished system update - tm.sendTaskFinishedUpdate(targetTask, "canceled") + glog.V(2).Infof("[%s] InstallTaskCanceled - Task canceled: ID=%s, App=%s, User=%s", + tm.instanceID, task.ID, task.AppName, task.User) + tm.finalizeTaskPersistence(task) + tm.recordTaskResult(task, task.Result, fmt.Errorf("installation canceled")) + tm.sendTaskFinishedUpdate(task, "canceled") return nil } // CancelInstallTaskSucceed marks a cancel install task as completed successfully by opID or appName+user func (tm *TaskModule) CancelInstallTaskSucceed(opID, appName, user string) error { - if !tm.mu.TryLock() { - glog.Warningf("[TryLock] failed to acquire lock for CancelInstallTaskSucceed, user: %s, opId: %s, app: %s", user, opID, appName) - return fmt.Errorf("failed to acquire lock for CancelInstallTaskSucceed") - } - defer tm.mu.Unlock() - - // First try to find the cancel install task with matching opID in running tasks - var targetTask *Task - for _, task := range tm.runningTasks { - if task.OpID == opID && task.Type == CancelAppInstall { - targetTask = task - break - } - } - - // If opID match failed, try to find by appName and user - if targetTask == nil && appName != "" && user != "" { - for _, task := range tm.runningTasks { - if task.Type == CancelAppInstall && task.AppName == appName && task.User == user { - targetTask = task - break - } - } + task, err := tm.completeRunningTask(opID, appName, user, + []TaskType{CancelAppInstall}, Completed, "Cancel installation completed successfully via external signal", "") + if err != nil { + glog.Warningf("[%s] CancelInstallTaskSucceed - %v", tm.instanceID, err) + return err } - if targetTask == nil { - glog.Warningf("[%s] CancelInstallTaskSucceed - No running cancel install task found with opID: %s or appName: %s, user: %s", - tm.instanceID, opID, appName, user) - return fmt.Errorf("no running cancel install task found with opID: %s or appName: %s, user: %s", opID, appName, user) - } - - // Mark task as completed - targetTask.Status = Completed - now := time.Now() - targetTask.CompletedAt = &now - targetTask.Result = "Cancel installation completed successfully via external signal" - - glog.V(2).Infof("[%s] CancelInstallTaskSucceed - Task marked as completed: ID=%s, OpID=%s, AppName=%s, User=%s, Duration=%v", - tm.instanceID, targetTask.ID, targetTask.OpID, targetTask.AppName, targetTask.User, now.Sub(*targetTask.StartedAt)) - - // Remove task from running tasks - delete(tm.runningTasks, targetTask.ID) - glog.V(2).Infof("[%s] CancelInstallTaskSucceed - Removed completed task from running tasks: ID=%s", tm.instanceID, targetTask.ID) - - tm.finalizeTaskPersistence(targetTask) - - // Record task completion in history - tm.recordTaskResult(targetTask, "Cancel installation completed successfully via external signal", nil) - - // Send task finished system update - tm.sendTaskFinishedUpdate(targetTask, "succeed") + glog.V(2).Infof("[%s] CancelInstallTaskSucceed - Task completed: ID=%s, OpID=%s, App=%s, User=%s", + tm.instanceID, task.ID, task.OpID, task.AppName, task.User) + tm.finalizeTaskPersistence(task) + tm.recordTaskResult(task, task.Result, nil) + tm.sendTaskFinishedUpdate(task, "succeed") return nil } // CancelInstallTaskFailed marks a cancel install task as failed by opID or appName+user func (tm *TaskModule) CancelInstallTaskFailed(opID, appName, user, errorMsg string) error { - if !tm.mu.TryLock() { - glog.Warningf("[TryLock] failed to acquire lock for CancelInstallTaskFailed, user: %s, opId: %s, name: %s, error: %s", user, opID, appName, errorMsg) - return fmt.Errorf("failed to acquire lock for CancelInstallTaskFailed") - } - defer tm.mu.Unlock() - - // First try to find the cancel install task with matching opID in running tasks - var targetTask *Task - for _, task := range tm.runningTasks { - if task.OpID == opID && task.Type == CancelAppInstall { - targetTask = task - break - } - } - - // If opID match failed, try to find by appName and user - if targetTask == nil && appName != "" && user != "" { - for _, task := range tm.runningTasks { - if task.Type == CancelAppInstall && task.AppName == appName && task.User == user { - targetTask = task - break - } - } - } - - if targetTask == nil { - glog.Warningf("[%s] CancelInstallTaskFailed - No running cancel install task found with opID: %s or appName: %s, user: %s", - tm.instanceID, opID, appName, user) - return fmt.Errorf("no running cancel install task found with opID: %s or appName: %s, user: %s", opID, appName, user) + task, err := tm.completeRunningTask(opID, appName, user, + []TaskType{CancelAppInstall}, Failed, "Cancel installation failed via external signal", errorMsg) + if err != nil { + glog.Warningf("[%s] CancelInstallTaskFailed - %v", tm.instanceID, err) + return err } - // Mark task as failed - targetTask.Status = Failed - now := time.Now() - targetTask.CompletedAt = &now - targetTask.ErrorMsg = errorMsg - targetTask.Result = "Cancel installation failed via external signal" - - glog.V(2).Infof("[%s] CancelInstallTaskFailed - Task marked as failed: ID=%s, OpID=%s, AppName=%s, User=%s, Duration=%v, Error: %s", - tm.instanceID, targetTask.ID, targetTask.OpID, targetTask.AppName, targetTask.User, now.Sub(*targetTask.StartedAt), errorMsg) - - // Remove task from running tasks - delete(tm.runningTasks, targetTask.ID) - glog.V(2).Infof("[%s] CancelInstallTaskFailed - Removed failed task from running tasks: ID=%s", tm.instanceID, targetTask.ID) - - tm.finalizeTaskPersistence(targetTask) - - // Record task failure in history - tm.recordTaskResult(targetTask, "Cancel installation failed via external signal", fmt.Errorf(errorMsg)) - - // Send task finished system update - tm.sendTaskFinishedUpdate(targetTask, "failed") + glog.V(2).Infof("[%s] CancelInstallTaskFailed - Task failed: ID=%s, OpID=%s, App=%s, User=%s, Error: %s", + tm.instanceID, task.ID, task.OpID, task.AppName, task.User, errorMsg) + tm.finalizeTaskPersistence(task) + tm.recordTaskResult(task, task.Result, fmt.Errorf(errorMsg)) + tm.sendTaskFinishedUpdate(task, "failed") return nil } // UninstallTaskSucceed marks an uninstall task as completed successfully by opID or appName+user func (tm *TaskModule) UninstallTaskSucceed(opID, appName, user string) error { - if !tm.mu.TryLock() { - glog.Warningf("[TryLock] failed to acquire lock for UninstallTaskSucceed, user: %s ,opId: %s, app: %s", user, opID, appName) - return fmt.Errorf("failed to acquire lock for UninstallTaskSucceed") - } - defer tm.mu.Unlock() - - // First try to find the uninstall task with matching opID in running tasks - var targetTask *Task - for _, task := range tm.runningTasks { - if task.OpID == opID && task.Type == UninstallApp { - targetTask = task - break - } - } - - // If opID match failed, try to find by appName and user - if targetTask == nil && appName != "" && user != "" { - for _, task := range tm.runningTasks { - if task.Type == UninstallApp && task.AppName == appName && task.User == user { - targetTask = task - break - } - } - } - - if targetTask == nil { - glog.Warningf("[%s] UninstallTaskSucceed - No running uninstall task found with opID: %s or appName: %s, user: %s", - tm.instanceID, opID, appName, user) - return fmt.Errorf("no running uninstall task found with opID: %s or appName: %s, user: %s", opID, appName, user) + task, err := tm.completeRunningTask(opID, appName, user, + []TaskType{UninstallApp}, Completed, "Uninstallation completed successfully via external signal", "") + if err != nil { + glog.Warningf("[%s] UninstallTaskSucceed - %v", tm.instanceID, err) + return err } - // Mark task as completed - targetTask.Status = Completed - now := time.Now() - targetTask.CompletedAt = &now - targetTask.Result = "Uninstallation completed successfully via external signal" - - glog.V(2).Infof("[%s] UninstallTaskSucceed - Task marked as completed: ID=%s, OpID=%s, AppName=%s, User=%s, Duration=%v", - tm.instanceID, targetTask.ID, targetTask.OpID, targetTask.AppName, targetTask.User, now.Sub(*targetTask.StartedAt)) - - // Remove task from running tasks - delete(tm.runningTasks, targetTask.ID) - glog.V(2).Infof("[%s] UninstallTaskSucceed - Removed completed task from running tasks: ID=%s", tm.instanceID, targetTask.ID) - - tm.finalizeTaskPersistence(targetTask) - - // Record task completion in history - tm.recordTaskResult(targetTask, "Uninstallation completed successfully via external signal", nil) - - // Send task finished system update - tm.sendTaskFinishedUpdate(targetTask, "succeed") + glog.V(2).Infof("[%s] UninstallTaskSucceed - Task completed: ID=%s, OpID=%s, App=%s, User=%s", + tm.instanceID, task.ID, task.OpID, task.AppName, task.User) + tm.finalizeTaskPersistence(task) + tm.recordTaskResult(task, task.Result, nil) + tm.sendTaskFinishedUpdate(task, "succeed") return nil } // UninstallTaskFailed marks an uninstall task as failed by opID or appName+user func (tm *TaskModule) UninstallTaskFailed(opID, appName, user, errorMsg string) error { - if !tm.mu.TryLock() { - glog.Warningf("[TryLock] failed to acquire lock for UninstallTaskFailed, user: %s, opId: %s, app: %s, error: %s", user, opID, appName, errorMsg) - return fmt.Errorf("failed to acquire lock for UninstallTaskFailed") - } - defer tm.mu.Unlock() - - // First try to find the uninstall task with matching opID in running tasks - var targetTask *Task - for _, task := range tm.runningTasks { - if task.OpID == opID && task.Type == UninstallApp { - targetTask = task - break - } - } - - // If opID match failed, try to find by appName and user - if targetTask == nil && appName != "" && user != "" { - for _, task := range tm.runningTasks { - if task.Type == UninstallApp && task.AppName == appName && task.User == user { - targetTask = task - break - } - } - } - - if targetTask == nil { - glog.Warningf("[%s] UninstallTaskFailed - No running uninstall task found with opID: %s or appName: %s, user: %s", - tm.instanceID, opID, appName, user) - return fmt.Errorf("no running uninstall task found with opID: %s or appName: %s, user: %s", opID, appName, user) + task, err := tm.completeRunningTask(opID, appName, user, + []TaskType{UninstallApp}, Failed, "Uninstallation failed via external signal", errorMsg) + if err != nil { + glog.Warningf("[%s] UninstallTaskFailed - %v", tm.instanceID, err) + return err } - // Mark task as failed - targetTask.Status = Failed - now := time.Now() - targetTask.CompletedAt = &now - targetTask.ErrorMsg = errorMsg - targetTask.Result = "Uninstallation failed via external signal" - - glog.V(2).Infof("[%s] UninstallTaskFailed - Task marked as failed: ID=%s, OpID=%s, AppName=%s, User=%s, Duration=%v, Error: %s", - tm.instanceID, targetTask.ID, targetTask.OpID, targetTask.AppName, targetTask.User, now.Sub(*targetTask.StartedAt), errorMsg) - - // Remove task from running tasks - delete(tm.runningTasks, targetTask.ID) - glog.V(2).Infof("[%s] UninstallTaskFailed - Removed failed task from running tasks: ID=%s", tm.instanceID, targetTask.ID) - - tm.finalizeTaskPersistence(targetTask) - - // Record task failure in history - tm.recordTaskResult(targetTask, "Uninstallation failed via external signal", fmt.Errorf(errorMsg)) - - // Send task finished system update - tm.sendTaskFinishedUpdate(targetTask, "failed") + glog.V(2).Infof("[%s] UninstallTaskFailed - Task failed: ID=%s, OpID=%s, App=%s, User=%s, Error: %s", + tm.instanceID, task.ID, task.OpID, task.AppName, task.User, errorMsg) + tm.finalizeTaskPersistence(task) + tm.recordTaskResult(task, task.Result, fmt.Errorf(errorMsg)) + tm.sendTaskFinishedUpdate(task, "failed") return nil } diff --git a/internal/v2/types/cache_manager.go b/internal/v2/types/cache_manager.go index 1683e30..eb2b1ba 100644 --- a/internal/v2/types/cache_manager.go +++ b/internal/v2/types/cache_manager.go @@ -3,26 +3,20 @@ package types // CacheManagerInterface defines the interface for cache management operations // This interface is used to avoid circular imports between packages type CacheManagerInterface interface { - // Lock acquires the cache manager's write lock - Lock() + // Hierarchical read accessors + GetAllUsersData() map[string]*UserData + GetUserData(userID string) *UserData + GetSourceData(userID, sourceID string) *SourceData + GetUserIDs() []string - // Unlock releases the cache manager's write lock - Unlock() + // Specific read queries + HasSourceData(sourceID string) bool + IsAppInstalled(sourceID, appName string) bool + GetSourceOthersHash(sourceID string) string + FindPendingDataForApp(userID, sourceID, appID string) *AppInfoLatestPendingData - // TryLock attempts to acquire the cache manager's write lock without blocking - // Returns true if lock acquired, false if would block - TryLock() bool - - // RLock acquires the cache manager's read lock - RLock() - - // RUnlock releases the cache manager's read lock - RUnlock() - - // TryRLock attempts to acquire the cache manager's read lock without blocking - // Returns true if lock acquired, false if would block - TryRLock() bool - - // GetCache returns the underlying cache data - GetCache() *CacheData + // Write operations + UpdateSourceOthers(sourceID string, others *Others) + RemoveAppFromAllSources(appName, sourceID string) int + CopyPendingVersionHistory(userID, sourceID, appID, appName string, latestData *AppInfoLatestData) error } diff --git a/internal/v2/utils/setup.go b/internal/v2/utils/setup.go index cf2e43e..c68a734 100644 --- a/internal/v2/utils/setup.go +++ b/internal/v2/utils/setup.go @@ -35,6 +35,16 @@ type AppServiceResponse struct { Url string `json:"url"` Invisible bool `json:"invisible"` } `json:"entrances"` + Settings struct { + ClusterScoped string `json:"clusterScoped"` + MobileSupported string `json:"mobileSupported"` + Policy string `json:"policy"` + RequiredGPU string `json:"requiredGPU"` + Source string `json:"source"` + Target string `json:"target"` + Title string `json:"title"` + Version string `json:"version"` + } `json:"settings"` } `json:"spec"` Status struct { State string `json:"state"` @@ -597,7 +607,7 @@ func createAppStateLatestData(app AppServiceResponse, isStartupProcess bool) (*t data := map[string]interface{}{ "name": app.Spec.Name, "rawAppName": app.Spec.RawAppName, - "title": app.Spec.Title, + "title": app.Spec.Settings.Title, "state": app.Status.State, "updateTime": app.Status.UpdateTime, "statusTime": app.Status.StatusTime, diff --git a/internal/v2/utils/state_monitor.go b/internal/v2/utils/state_monitor.go index 72016cc..2bd2c82 100644 --- a/internal/v2/utils/state_monitor.go +++ b/internal/v2/utils/state_monitor.go @@ -10,7 +10,7 @@ import ( // DataSenderInterface defines the interface for sending app info updates type DataSenderInterface interface { - SendAppInfoUpdate(update types.AppInfoUpdate) error + SendAppInfoUpdate(update types.AppInfoUpdate, trace string) error IsConnected() bool Close() } @@ -66,7 +66,7 @@ func (sm *StateMonitor) NotifyStateChange( Source: sourceID, } - return sm.dataSender.SendAppInfoUpdate(update) + return sm.dataSender.SendAppInfoUpdate(update, "state_monitor") } // HasStateChanged checks if the app state has changed compared to existing state diff --git a/pkg/v2/api/app.go b/pkg/v2/api/app.go index 99e8b30..00dc3b8 100644 --- a/pkg/v2/api/app.go +++ b/pkg/v2/api/app.go @@ -97,6 +97,7 @@ type FilteredSourceData struct { // FilteredSourceDataForState represents filtered source data for state endpoint (only AppStateLatest) type FilteredSourceDataForState struct { Type types.SourceDataType `json:"type"` + AppInfoLatest []*types.AppInfoLatestData `json:"app_info_latest,omitempty"` AppStateLatest []*types.AppStateLatestData `json:"app_state_latest"` } @@ -277,7 +278,7 @@ func (s *Server) getAppsInfo(w http.ResponseWriter, r *http.Request) { }() // Get user data from cache - userData := s.cacheManager.GetUserDataNoLock(userID) + userData := s.cacheManager.GetUserData(userID) if userData == nil { glog.V(3).Infof("User data not found for user: %s", userID) resultChan <- result{err: fmt.Errorf("user data not found")} @@ -629,20 +630,17 @@ func (s *Server) getMarketHash(w http.ResponseWriter, r *http.Request) { } }() - // Get user data from cache with fallback (non-blocking) - userData := s.cacheManager.GetUserDataNoLock(userID) + userData := s.cacheManager.GetUserData(userID) if userData == nil { glog.Warningf("User data not found for user: %s, attempting to resync user data", userID) - // Try to resync user data to fix missing user information if err := s.cacheManager.ResynceUser(); err != nil { glog.Errorf("Failed to resync user data for user %s: %v", userID, err) resultChan <- result{err: fmt.Errorf("failed to resync user data: %v", err)} return } - // Try to get user data again after resync - userData = s.cacheManager.GetUserDataNoLock(userID) + userData = s.cacheManager.GetUserData(userID) if userData == nil { glog.Warningf("User data still not found for user: %s after resync", userID) resultChan <- result{err: fmt.Errorf("user data not found even after resync")} @@ -664,9 +662,6 @@ func (s *Server) getMarketHash(w http.ResponseWriter, r *http.Request) { case <-ctx.Done(): glog.V(3).Infof("Request timeout or cancelled for /api/v2/market/hash") // On timeout, dump lock info to find who holds the lock - if s.cacheManager != nil { - s.cacheManager.DumpLockInfo("getMarketHash timeout") - } s.sendResponse(w, http.StatusRequestTimeout, false, "Request timeout - hash retrieval took too long", nil) return case res := <-resultChan: @@ -760,7 +755,113 @@ func (s *Server) getMarketState(w http.ResponseWriter, r *http.Request) { // Filter the user data to include only AppStateLatest fields with timeout filterStart := time.Now() - filteredUserData := s.filterUserDataForStateWithTimeout(ctx, userData) + filteredUserData := s.filterUserDataForStateWithTimeout(ctx, userData, false) + if filteredUserData == nil { + glog.V(3).Infof("Data filtering timed out or failed for user: %s", userID) + resultChan <- result{err: fmt.Errorf("data filtering timeout")} + return + } + glog.V(3).Infof("Data filtering took %v for user: %s", time.Since(filterStart), userID) + + // Prepare response data + responseData := MarketStateResponse{ + UserData: filteredUserData, + UserID: userID, + Timestamp: time.Now().Unix(), + } + + resultChan <- result{data: responseData} + }() + + // Wait for result or timeout + select { + case <-ctx.Done(): + glog.V(3).Infof("Request timeout or cancelled for /api/v2/market/state") + s.sendResponse(w, http.StatusRequestTimeout, false, "Request timeout - data retrieval took too long", nil) + return + case res := <-resultChan: + if res.err != nil { + glog.Errorf("Error retrieving market state: %v", res.err) + if res.err.Error() == "user data not found" { + s.sendResponse(w, http.StatusNotFound, false, "User data not found", nil) + } else { + s.sendResponse(w, http.StatusInternalServerError, false, "Failed to retrieve market state", nil) + } + return + } + + glog.V(2).Infof("Market state retrieved successfully for user: %s", userID) + s.sendResponse(w, http.StatusOK, true, "Market state retrieved successfully", res.data) + } +} + +// Get market state information (only AppStateLatest data) +func (s *Server) getMarketStateSimple(w http.ResponseWriter, r *http.Request) { + requestStart := time.Now() + glog.V(2).Infof("GET /api/v2/market/statesimple - Getting market state simple, request start: %v", requestStart) + + // Add timeout context + ctx, cancel := context.WithTimeout(r.Context(), 30*time.Second) + defer cancel() + + // Check if cache manager is available + if s.cacheManager == nil { + glog.V(3).Info("Cache manager is not initialized") + s.sendResponse(w, http.StatusInternalServerError, false, "Cache manager not available", nil) + return + } + + // Convert http.Request to restful.Request to reuse utils functions + restfulReq := s.httpToRestfulRequest(r) + + // Get user information from request using utils module + authStart := time.Now() + userID, err := utils.GetUserInfoFromRequest(restfulReq) + if err != nil { + glog.Errorf("Failed to get user from request: %v", err) + s.sendResponse(w, http.StatusUnauthorized, false, "Failed to get user information", nil) + return + } + glog.V(3).Infof("User authentication took %v, retrieved user ID: %s", time.Since(authStart), userID) + + // Create a channel to receive the result + type result struct { + data MarketStateResponse + err error + } + resultChan := make(chan result, 1) + + // Run the data retrieval in a goroutine + go func() { + defer func() { + if r := recover(); r != nil { + glog.Errorf("Panic in getMarketState: %v", r) + resultChan <- result{err: fmt.Errorf("internal error occurred")} + } + }() + + // Get user data from cache with timeout check + start := time.Now() + userData := s.cacheManager.GetUserData(userID) + if userData == nil { + glog.V(3).Infof("User data not found for user: %s", userID) + resultChan <- result{err: fmt.Errorf("user data not found")} + return + } + glog.V(3).Infof("GetUserData took %v for user: %s", time.Since(start), userID) + + // Check if we're still within timeout before filtering + select { + case <-ctx.Done(): + glog.V(3).Infof("Context cancelled during user data retrieval for user: %s", userID) + resultChan <- result{err: fmt.Errorf("request cancelled")} + return + default: + } + + // Filter the user data to include only AppStateLatest fields with timeout + filterStart := time.Now() + filteredUserData := s.filterUserDataForStateWithTimeout(ctx, userData, true) if filteredUserData == nil { glog.V(3).Infof("Data filtering timed out or failed for user: %s", userID) resultChan <- result{err: fmt.Errorf("data filtering timeout")} @@ -847,7 +948,7 @@ func (s *Server) getMarketData(w http.ResponseWriter, r *http.Request) { // Get user data from cache with timeout check start := time.Now() - userData := s.cacheManager.GetUserDataNoLock(userID) + userData := s.cacheManager.GetUserData(userID) if userData == nil { glog.V(3).Infof("User data not found for user: %s", userID) resultChan <- result{err: fmt.Errorf("user data not found")} @@ -1049,7 +1150,7 @@ func (s *Server) convertSourceDataToFiltered(sourceData *types.SourceData) *Filt } // filterUserDataForStateWithTimeout filters user data to include only AppStateLatest fields with timeout -func (s *Server) filterUserDataForStateWithTimeout(ctx context.Context, userData *types.UserData) *FilteredUserDataForState { +func (s *Server) filterUserDataForStateWithTimeout(ctx context.Context, userData *types.UserData, withAppInfoLatest bool) *FilteredUserDataForState { if userData == nil { return nil } @@ -1081,7 +1182,7 @@ func (s *Server) filterUserDataForStateWithTimeout(ctx context.Context, userData } // Convert data directly without additional locks - filteredSourceData := s.convertSourceDataToFilteredForState(sourceData) + filteredSourceData := s.convertSourceDataToFilteredForState(sourceData, withAppInfoLatest) if filteredSourceData != nil { filteredUserData.Sources[sourceID] = filteredSourceData } @@ -1092,7 +1193,7 @@ func (s *Server) filterUserDataForStateWithTimeout(ctx context.Context, userData } // convertSourceDataToFilteredForState converts source data to filtered format for state endpoint (only AppStateLatest) -func (s *Server) convertSourceDataToFilteredForState(sourceData *types.SourceData) *FilteredSourceDataForState { +func (s *Server) convertSourceDataToFilteredForState(sourceData *types.SourceData, withAppInfoLatest bool) *FilteredSourceDataForState { if sourceData == nil { return nil } @@ -1104,6 +1205,17 @@ func (s *Server) convertSourceDataToFilteredForState(sourceData *types.SourceDat AppStateLatest: sourceData.AppStateLatest, } + if withAppInfoLatest { + var appInfoLatest []*types.AppInfoLatestData + for _, app := range sourceData.AppInfoLatest { + var info = &types.AppInfoLatestData{ + AppSimpleInfo: app.AppSimpleInfo, + } + appInfoLatest = append(appInfoLatest, info) + } + filteredSourceData.AppInfoLatest = appInfoLatest + } + return filteredSourceData } diff --git a/pkg/v2/api/server.go b/pkg/v2/api/server.go index 6a60b2e..f00f4ff 100644 --- a/pkg/v2/api/server.go +++ b/pkg/v2/api/server.go @@ -86,6 +86,9 @@ func (s *Server) setupRoutes() { api.HandleFunc("/market/state", s.getMarketState).Methods("GET") glog.V(3).Info("Route configured: GET /app-store/api/v2/market/state") + api.HandleFunc("/market/statesimple", s.getMarketStateSimple).Methods("GET") + glog.V(3).Info("Route configured: GET /app-store/api/v2/market/statesimple") + // 2. Get specific application information (supports multiple queries) api.HandleFunc("/apps", s.getAppsInfo).Methods("POST") glog.V(3).Info("Route configured: POST /app-store/api/v2/apps") diff --git a/pkg/v2/api/system.go b/pkg/v2/api/system.go index 332eb59..61eca87 100644 --- a/pkg/v2/api/system.go +++ b/pkg/v2/api/system.go @@ -407,27 +407,7 @@ func doGetWithBflUser(url, bflUser string) (interface{}, error) { } func doGetUsers(cm *appinfo.CacheManager) ([]map[string]string, error) { - if ok := cm.TryRLock(); !ok { - glog.Warning("[TryRLock] doGetUsers: CacheManager read lock not available") - return nil, nil - } - defer cm.RUnlock() - - var usersInfo []map[string]string - - getUsers := cm.GetCache().Users - for _, v := range getUsers { - if v.UserInfo != nil && v.UserInfo.Exists { - var ui = make(map[string]string) - ui["id"] = v.UserInfo.Id - ui["name"] = v.UserInfo.Name - ui["role"] = v.UserInfo.Role - ui["status"] = v.UserInfo.Status - usersInfo = append(usersInfo, ui) - } - } - - return usersInfo, nil + return cm.ListActiveUsers(), nil } func getenv(key string) string { diff --git a/pkg/v2/api/task.go b/pkg/v2/api/task.go index bd86323..91db4b8 100644 --- a/pkg/v2/api/task.go +++ b/pkg/v2/api/task.go @@ -216,11 +216,11 @@ func (s *Server) installApp(w http.ResponseWriter, r *http.Request) { } if targetApp.AppInfo == nil { - glog.V(2).Infof("installApp: targetApp.AppInfo is nil for app=%s source=%s", request.AppName, request.Source) + glog.V(2).Infof("installApp: targetApp.AppInfo is nil for app=%s, source=%s", request.AppName, request.Source) } else if targetApp.AppInfo.Price == nil { - glog.V(2).Infof("installApp: targetApp.AppInfo.Price is nil for app=%s source=%s", request.AppName, request.Source) + glog.V(2).Infof("installApp: targetApp.AppInfo.Price is nil for app=%s, source=%s", request.AppName, request.Source) } else { - glog.V(2).Infof("installApp: targetApp.AppInfo.Price detected for app=%s source=%s", request.AppName, request.Source) + glog.V(2).Infof("installApp: targetApp.AppInfo.Price detected for app=%s, source=%s", request.AppName, request.Source) } // Step 8: Verify chart package exists @@ -254,7 +254,7 @@ func (s *Server) installApp(w http.ResponseWriter, r *http.Request) { } productID, developerName := extractInstallProductMetadata(targetApp.AppInfo) - glog.V(2).Infof("installApp: extracted product metadata app=%s source=%s productID=%s developer=%s", request.AppName, request.Source, productID, developerName) + glog.V(2).Infof("installApp: extracted product metadata app=%s, source=%s, productID=%s, developer=%s", request.AppName, request.Source, productID, developerName) realAppID := request.AppName if targetApp.AppInfo != nil && targetApp.AppInfo.AppEntry != nil && targetApp.AppInfo.AppEntry.ID != "" { @@ -262,7 +262,7 @@ func (s *Server) installApp(w http.ResponseWriter, r *http.Request) { } else if targetApp.RawData != nil && targetApp.RawData.AppID != "" { realAppID = targetApp.RawData.AppID } - glog.V(2).Infof("installApp: resolved realAppID=%s for app=%s source=%s", realAppID, request.AppName, request.Source) + glog.V(2).Infof("installApp: resolved realAppID=%s for app=%s, source=%s, sync=%v", realAppID, request.AppName, request.Source, request.Sync) // Step 10: Create installation task taskMetadata := map[string]interface{}{ @@ -278,15 +278,15 @@ func (s *Server) installApp(w http.ResponseWriter, r *http.Request) { } if productID != "" { taskMetadata["productID"] = productID - glog.V(2).Infof("installApp: added productID=%s to metadata for app=%s source=%s", productID, request.AppName, request.Source) + glog.V(2).Infof("installApp: added productID=%s to metadata for app=%s, source=%s", productID, request.AppName, request.Source) } if developerName != "" { taskMetadata["developerName"] = developerName - glog.V(2).Infof("installApp: added developerName=%s to metadata for app=%s source=%s", developerName, request.AppName, request.Source) + glog.V(2).Infof("installApp: added developerName=%s to metadata for app=%s, source=%s", developerName, request.AppName, request.Source) } if realAppID != "" { taskMetadata["realAppID"] = realAppID - glog.V(2).Infof("installApp: added realAppID=%s to metadata for app=%s source=%s", realAppID, request.AppName, request.Source) + glog.V(2).Infof("installApp: added realAppID=%s to metadata for app=%s, source=%s", realAppID, request.AppName, request.Source) } // Handle synchronous requests with proper blocking @@ -311,7 +311,7 @@ func (s *Server) installApp(w http.ResponseWriter, r *http.Request) { return } - glog.V(2).Infof("Created synchronous installation task: ID=%s for app: %s version: %s", task.ID, request.AppName, request.Version) + glog.V(2).Infof("Created synchronous installation task: ID=%s for app: %s, version: %s", task.ID, request.AppName, request.Version) // Wait for task completion <-done @@ -356,7 +356,7 @@ func (s *Server) installApp(w http.ResponseWriter, r *http.Request) { return } - glog.V(2).Infof("Created asynchronous installation task: ID=%s for app: %s version: %s", task.ID, request.AppName, request.Version) + glog.V(2).Infof("Created asynchronous installation task: ID=%s for app: %s, version: %s", task.ID, request.AppName, request.Version) // Return immediately for asynchronous requests s.sendResponse(w, http.StatusOK, true, "App installation started successfully", map[string]interface{}{