diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index d82d3a1b9..c5e12b7ee 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -40,7 +40,7 @@ protected function schedule(Schedule $schedule): void } // $this->scheduleInstance->job(new CleanupStaleMultiplexedConnections)->hourly(); - $this->scheduleInstance->command('cleanup:redis')->weekly(); + $this->scheduleInstance->command('cleanup:redis --clear-locks')->daily(); if (isDev()) { // Instance Jobs diff --git a/app/Jobs/ScheduledJobManager.php b/app/Jobs/ScheduledJobManager.php index d69585788..de782b96d 100644 --- a/app/Jobs/ScheduledJobManager.php +++ b/app/Jobs/ScheduledJobManager.php @@ -15,7 +15,9 @@ use Illuminate\Queue\SerializesModels; use Illuminate\Support\Carbon; use Illuminate\Support\Collection; +use Illuminate\Support\Facades\Cache; use Illuminate\Support\Facades\Log; +use Illuminate\Support\Facades\Redis; class ScheduledJobManager implements ShouldQueue { @@ -54,6 +56,11 @@ private function determineQueue(): string */ public function middleware(): array { + // Self-healing: clear any stale lock before WithoutOverlapping tries to acquire it. + // Stale locks (TTL = -1) can occur during upgrades, Redis restarts, or edge cases. + // @see https://github.com/coollabsio/coolify/issues/8327 + self::clearStaleLockIfPresent(); + return [ (new WithoutOverlapping('scheduled-job-manager')) ->expireAfter(90) // Lock expires after 90s to handle high-load environments with many tasks @@ -61,6 +68,34 @@ public function middleware(): array ]; } + /** + * Clear a stale WithoutOverlapping lock if it has no TTL (TTL = -1). + * + * This provides continuous self-healing since it runs every time the job is dispatched. + * Stale locks permanently block all scheduled job executions with no user-visible error. + */ + private static function clearStaleLockIfPresent(): void + { + try { + $cachePrefix = config('cache.prefix', ''); + $lockKey = $cachePrefix.'laravel-queue-overlap:'.self::class.':scheduled-job-manager'; + + $ttl = Redis::connection('default')->ttl($lockKey); + + if ($ttl === -1) { + Redis::connection('default')->del($lockKey); + Log::channel('scheduled')->warning('Cleared stale ScheduledJobManager lock', [ + 'lock_key' => $lockKey, + ]); + } + } catch (\Throwable $e) { + // Never let lock cleanup failure prevent the job from running + Log::channel('scheduled-errors')->error('Failed to check/clear stale lock', [ + 'error' => $e->getMessage(), + ]); + } + } + public function handle(): void { // Freeze the execution time at the start of the job @@ -108,6 +143,13 @@ public function handle(): void 'dispatched' => $this->dispatchedCount, 'skipped' => $this->skippedCount, ]); + + // Write heartbeat so the UI can detect when the scheduler has stopped + try { + Cache::put('scheduled-job-manager:heartbeat', now()->toIso8601String(), 300); + } catch (\Throwable) { + // Non-critical; don't let heartbeat failure affect the job + } } private function processScheduledBackups(): void diff --git a/app/Livewire/Server/DockerCleanup.php b/app/Livewire/Server/DockerCleanup.php index 92094c950..12d111d21 100644 --- a/app/Livewire/Server/DockerCleanup.php +++ b/app/Livewire/Server/DockerCleanup.php @@ -3,8 +3,13 @@ namespace App\Livewire\Server; use App\Jobs\DockerCleanupJob; +use App\Models\DockerCleanupExecution; use App\Models\Server; +use Cron\CronExpression; use Illuminate\Foundation\Auth\Access\AuthorizesRequests; +use Illuminate\Support\Carbon; +use Illuminate\Support\Facades\Cache; +use Livewire\Attributes\Computed; use Livewire\Attributes\Validate; use Livewire\Component; @@ -34,6 +39,53 @@ class DockerCleanup extends Component #[Validate('boolean')] public bool $disableApplicationImageRetention = false; + #[Computed] + public function isCleanupStale(): bool + { + try { + $lastExecution = DockerCleanupExecution::where('server_id', $this->server->id) + ->orderBy('created_at', 'desc') + ->first(); + + if (! $lastExecution) { + return false; + } + + $frequency = $this->server->settings->docker_cleanup_frequency ?? '0 0 * * *'; + if (isset(VALID_CRON_STRINGS[$frequency])) { + $frequency = VALID_CRON_STRINGS[$frequency]; + } + + $cron = new CronExpression($frequency); + $now = Carbon::now(); + $nextRun = Carbon::parse($cron->getNextRunDate($now)); + $afterThat = Carbon::parse($cron->getNextRunDate($nextRun)); + $intervalMinutes = $nextRun->diffInMinutes($afterThat); + + $threshold = max($intervalMinutes * 2, 10); + + return Carbon::parse($lastExecution->created_at)->diffInMinutes($now) > $threshold; + } catch (\Throwable) { + return false; + } + } + + #[Computed] + public function lastExecutionTime(): ?string + { + return DockerCleanupExecution::where('server_id', $this->server->id) + ->orderBy('created_at', 'desc') + ->first() + ?->created_at + ?->diffForHumans(); + } + + #[Computed] + public function isSchedulerHealthy(): bool + { + return Cache::get('scheduled-job-manager:heartbeat') !== null; + } + public function mount(string $server_uuid) { try { diff --git a/resources/views/livewire/server/docker-cleanup.blade.php b/resources/views/livewire/server/docker-cleanup.blade.php index 251137fa7..2fd8fc2ab 100644 --- a/resources/views/livewire/server/docker-cleanup.blade.php +++ b/resources/views/livewire/server/docker-cleanup.blade.php @@ -27,6 +27,22 @@
Configure Docker cleanup settings for your server.
+ @if ($this->isCleanupStale) +
+ +

The last Docker cleanup ran {{ $this->lastExecutionTime ?? 'unknown time' }} ago, + which is longer than expected for the configured frequency.

+ @if (!$this->isSchedulerHealthy) +

The scheduled job manager appears to be inactive. This may indicate + a stale Redis lock is blocking all scheduled jobs.

+ @endif +

To resolve, run on your Coolify instance: + php artisan cleanup:redis --clear-locks +

+
+
+ @endif +

Cleanup Configuration

diff --git a/tests/Feature/ScheduledJobManagerStaleLockTest.php b/tests/Feature/ScheduledJobManagerStaleLockTest.php new file mode 100644 index 000000000..e297c07bd --- /dev/null +++ b/tests/Feature/ScheduledJobManagerStaleLockTest.php @@ -0,0 +1,49 @@ +set($lockKey, 'stale-owner'); + + expect($redis->ttl($lockKey))->toBe(-1); + + $job = new ScheduledJobManager; + $job->middleware(); + + expect($redis->exists($lockKey))->toBe(0); +}); + +it('preserves valid lock with positive TTL', function () { + $cachePrefix = config('cache.prefix'); + $lockKey = $cachePrefix.'laravel-queue-overlap:'.ScheduledJobManager::class.':scheduled-job-manager'; + + $redis = Redis::connection('default'); + $redis->set($lockKey, 'active-owner'); + $redis->expire($lockKey, 60); + + expect($redis->ttl($lockKey))->toBeGreaterThan(0); + + $job = new ScheduledJobManager; + $job->middleware(); + + expect($redis->exists($lockKey))->toBe(1); + + $redis->del($lockKey); +}); + +it('does not fail when no lock exists', function () { + $cachePrefix = config('cache.prefix'); + $lockKey = $cachePrefix.'laravel-queue-overlap:'.ScheduledJobManager::class.':scheduled-job-manager'; + + Redis::connection('default')->del($lockKey); + + $job = new ScheduledJobManager; + $middleware = $job->middleware(); + + expect($middleware)->toBeArray()->toHaveCount(1); +}); diff --git a/tests/Unit/ScheduledJobManagerLockTest.php b/tests/Unit/ScheduledJobManagerLockTest.php index 3f3ae593a..577730f47 100644 --- a/tests/Unit/ScheduledJobManagerLockTest.php +++ b/tests/Unit/ScheduledJobManagerLockTest.php @@ -24,7 +24,7 @@ $expiresAfterProperty->setAccessible(true); $expiresAfter = $expiresAfterProperty->getValue($overlappingMiddleware); - expect($expiresAfter)->toBe(60) + expect($expiresAfter)->toBe(90) ->and($expiresAfter)->toBeGreaterThan(0, 'expireAfter must be set to prevent stale locks'); // Check releaseAfter is NOT set (we use dontRelease)