diff --git a/app/Console/Commands/CleanupRedis.php b/app/Console/Commands/CleanupRedis.php index f6a2de75b..a5fdc33e0 100644 --- a/app/Console/Commands/CleanupRedis.php +++ b/app/Console/Commands/CleanupRedis.php @@ -7,7 +7,7 @@ class CleanupRedis extends Command { - protected $signature = 'cleanup:redis {--dry-run : Show what would be deleted without actually deleting} {--skip-overlapping : Skip overlapping queue cleanup} {--clear-locks : Clear stale WithoutOverlapping locks}'; + protected $signature = 'cleanup:redis {--dry-run : Show what would be deleted without actually deleting} {--skip-overlapping : Skip overlapping queue cleanup} {--clear-locks : Clear stale WithoutOverlapping locks} {--restart : Aggressive cleanup mode for system restart (marks all processing jobs as failed)}'; protected $description = 'Cleanup Redis (Horizon jobs, metrics, overlapping queues, cache locks, and related data)'; @@ -63,6 +63,14 @@ public function handle() $deletedCount += $locksCleaned; } + // Clean up stuck jobs (restart mode = aggressive, runtime mode = conservative) + $isRestart = $this->option('restart'); + if ($isRestart || $this->option('clear-locks')) { + $this->info($isRestart ? 'Cleaning up stuck jobs (RESTART MODE - aggressive)...' : 'Checking for stuck jobs (runtime mode - conservative)...'); + $jobsCleaned = $this->cleanupStuckJobs($redis, $prefix, $dryRun, $isRestart); + $deletedCount += $jobsCleaned; + } + if ($dryRun) { $this->info("DRY RUN: Would delete {$deletedCount} out of {$totalKeys} keys"); } else { @@ -332,4 +340,98 @@ private function cleanupCacheLocks(bool $dryRun): int return $cleanedCount; } + + /** + * Clean up stuck jobs based on mode (restart vs runtime). + * + * @param mixed $redis Redis connection + * @param string $prefix Horizon prefix + * @param bool $dryRun Dry run mode + * @param bool $isRestart Restart mode (aggressive) vs runtime mode (conservative) + * @return int Number of jobs cleaned + */ + private function cleanupStuckJobs($redis, string $prefix, bool $dryRun, bool $isRestart): int + { + $cleanedCount = 0; + $now = time(); + + // Get all keys with the horizon prefix + $keys = $redis->keys('*'); + + foreach ($keys as $key) { + $keyWithoutPrefix = str_replace($prefix, '', $key); + $type = $redis->command('type', [$keyWithoutPrefix]); + + // Only process hash-type keys (individual jobs) + if ($type !== 5) { + continue; + } + + $data = $redis->command('hgetall', [$keyWithoutPrefix]); + $status = data_get($data, 'status'); + $payload = data_get($data, 'payload'); + + // Only process jobs in "processing" or "reserved" state + if (! in_array($status, ['processing', 'reserved'])) { + continue; + } + + // Parse job payload to get job class and started time + $payloadData = json_decode($payload, true); + $jobClass = data_get($payloadData, 'displayName', 'Unknown'); + $pushedAt = (int) data_get($data, 'pushed_at', 0); + + // Calculate how long the job has been processing + $processingTime = $now - $pushedAt; + + $shouldFail = false; + $reason = ''; + + if ($isRestart) { + // RESTART MODE: Mark ALL processing/reserved jobs as failed + // Safe because all workers are dead on restart + $shouldFail = true; + $reason = 'System restart - all workers terminated'; + } else { + // RUNTIME MODE: Only mark truly stuck jobs as failed + // Be conservative to avoid killing legitimate long-running jobs + + // Skip ApplicationDeploymentJob entirely (has dynamic_timeout, can run 2+ hours) + if (str_contains($jobClass, 'ApplicationDeploymentJob')) { + continue; + } + + // Skip DatabaseBackupJob (large backups can take hours) + if (str_contains($jobClass, 'DatabaseBackupJob')) { + continue; + } + + // For other jobs, only fail if processing > 12 hours + if ($processingTime > 43200) { // 12 hours + $shouldFail = true; + $reason = 'Processing for more than 12 hours'; + } + } + + if ($shouldFail) { + if ($dryRun) { + $this->warn(" Would mark as FAILED: {$jobClass} (processing for ".round($processingTime / 60, 1)." min) - {$reason}"); + } else { + // Mark job as failed + $redis->command('hset', [$keyWithoutPrefix, 'status', 'failed']); + $redis->command('hset', [$keyWithoutPrefix, 'failed_at', $now]); + $redis->command('hset', [$keyWithoutPrefix, 'exception', "Job cleaned up by cleanup:redis - {$reason}"]); + + $this->info(" ✓ Marked as FAILED: {$jobClass} (processing for ".round($processingTime / 60, 1).' min) - '.$reason); + } + $cleanedCount++; + } + } + + if ($cleanedCount === 0) { + $this->info($isRestart ? ' No jobs to clean up' : ' No stuck jobs found (all jobs running normally)'); + } + + return $cleanedCount; + } } diff --git a/app/Console/Commands/Dev.php b/app/Console/Commands/Dev.php index 8f26d78ff..acc6dc2f9 100644 --- a/app/Console/Commands/Dev.php +++ b/app/Console/Commands/Dev.php @@ -4,6 +4,9 @@ use App\Jobs\CheckHelperImageJob; use App\Models\InstanceSettings; +use App\Models\ScheduledDatabaseBackupExecution; +use App\Models\ScheduledTaskExecution; +use Carbon\Carbon; use Illuminate\Console\Command; use Illuminate\Support\Facades\Artisan; @@ -45,6 +48,44 @@ public function init() } else { echo "Instance already initialized.\n"; } + + // Clean up stuck jobs and stale locks on development startup + try { + echo "Cleaning up Redis (stuck jobs and stale locks)...\n"; + Artisan::call('cleanup:redis', ['--restart' => true, '--clear-locks' => true]); + echo "Redis cleanup completed.\n"; + } catch (\Throwable $e) { + echo "Error in cleanup:redis: {$e->getMessage()}\n"; + } + + try { + $updatedTaskCount = ScheduledTaskExecution::where('status', 'running')->update([ + 'status' => 'failed', + 'message' => 'Marked as failed during Coolify startup - job was interrupted', + 'finished_at' => Carbon::now(), + ]); + + if ($updatedTaskCount > 0) { + echo "Marked {$updatedTaskCount} stuck scheduled task executions as failed\n"; + } + } catch (\Throwable $e) { + echo "Could not cleanup stuck scheduled task executions: {$e->getMessage()}\n"; + } + + try { + $updatedBackupCount = ScheduledDatabaseBackupExecution::where('status', 'running')->update([ + 'status' => 'failed', + 'message' => 'Marked as failed during Coolify startup - job was interrupted', + 'finished_at' => Carbon::now(), + ]); + + if ($updatedBackupCount > 0) { + echo "Marked {$updatedBackupCount} stuck database backup executions as failed\n"; + } + } catch (\Throwable $e) { + echo "Could not cleanup stuck database backup executions: {$e->getMessage()}\n"; + } + CheckHelperImageJob::dispatch(); } } diff --git a/app/Console/Commands/Init.php b/app/Console/Commands/Init.php index 975a3c006..66cb77838 100644 --- a/app/Console/Commands/Init.php +++ b/app/Console/Commands/Init.php @@ -10,9 +10,12 @@ use App\Models\Environment; use App\Models\InstanceSettings; use App\Models\ScheduledDatabaseBackup; +use App\Models\ScheduledDatabaseBackupExecution; +use App\Models\ScheduledTaskExecution; use App\Models\Server; use App\Models\StandalonePostgresql; use App\Models\User; +use Carbon\Carbon; use Illuminate\Console\Command; use Illuminate\Support\Facades\Artisan; use Illuminate\Support\Facades\File; @@ -73,7 +76,7 @@ public function handle() $this->cleanupUnusedNetworkFromCoolifyProxy(); try { - $this->call('cleanup:redis', ['--clear-locks' => true]); + $this->call('cleanup:redis', ['--restart' => true, '--clear-locks' => true]); } catch (\Throwable $e) { echo "Error in cleanup:redis command: {$e->getMessage()}\n"; } @@ -103,6 +106,34 @@ public function handle() echo "Could not cleanup inprogress deployments: {$e->getMessage()}\n"; } + try { + $updatedTaskCount = ScheduledTaskExecution::where('status', 'running')->update([ + 'status' => 'failed', + 'message' => 'Marked as failed during Coolify startup - job was interrupted', + 'finished_at' => Carbon::now(), + ]); + + if ($updatedTaskCount > 0) { + echo "Marked {$updatedTaskCount} stuck scheduled task executions as failed\n"; + } + } catch (\Throwable $e) { + echo "Could not cleanup stuck scheduled task executions: {$e->getMessage()}\n"; + } + + try { + $updatedBackupCount = ScheduledDatabaseBackupExecution::where('status', 'running')->update([ + 'status' => 'failed', + 'message' => 'Marked as failed during Coolify startup - job was interrupted', + 'finished_at' => Carbon::now(), + ]); + + if ($updatedBackupCount > 0) { + echo "Marked {$updatedBackupCount} stuck database backup executions as failed\n"; + } + } catch (\Throwable $e) { + echo "Could not cleanup stuck database backup executions: {$e->getMessage()}\n"; + } + try { $localhost = $this->servers->where('id', 0)->first(); if ($localhost) { diff --git a/app/Jobs/CoolifyTask.php b/app/Jobs/CoolifyTask.php index 49a5ba8dd..d6dc6fa05 100755 --- a/app/Jobs/CoolifyTask.php +++ b/app/Jobs/CoolifyTask.php @@ -3,18 +3,35 @@ namespace App\Jobs; use App\Actions\CoolifyTask\RunRemoteProcess; +use App\Enums\ProcessStatus; use Illuminate\Bus\Queueable; use Illuminate\Contracts\Queue\ShouldBeEncrypted; use Illuminate\Contracts\Queue\ShouldQueue; use Illuminate\Foundation\Bus\Dispatchable; use Illuminate\Queue\InteractsWithQueue; use Illuminate\Queue\SerializesModels; +use Illuminate\Support\Facades\Log; use Spatie\Activitylog\Models\Activity; class CoolifyTask implements ShouldBeEncrypted, ShouldQueue { use Dispatchable, InteractsWithQueue, Queueable, SerializesModels; + /** + * The number of times the job may be attempted. + */ + public $tries = 3; + + /** + * The maximum number of unhandled exceptions to allow before failing. + */ + public $maxExceptions = 1; + + /** + * The number of seconds the job can run before timing out. + */ + public $timeout = 600; + /** * Create a new job instance. */ @@ -42,4 +59,36 @@ public function handle(): void $remote_process(); } + + /** + * Calculate the number of seconds to wait before retrying the job. + */ + public function backoff(): array + { + return [30, 90, 180]; // 30s, 90s, 180s between retries + } + + /** + * Handle a job failure. + */ + public function failed(?\Throwable $exception): void + { + Log::channel('scheduled-errors')->error('CoolifyTask permanently failed', [ + 'job' => 'CoolifyTask', + 'activity_id' => $this->activity->id, + 'server_uuid' => $this->activity->getExtraProperty('server_uuid'), + 'command_preview' => substr($this->activity->getExtraProperty('command') ?? '', 0, 200), + 'error' => $exception?->getMessage(), + 'total_attempts' => $this->attempts(), + 'trace' => $exception?->getTraceAsString(), + ]); + + // Update activity status to reflect permanent failure + $this->activity->properties = $this->activity->properties->merge([ + 'status' => ProcessStatus::ERROR->value, + 'error' => $exception?->getMessage() ?? 'Job permanently failed', + 'failed_at' => now()->toIso8601String(), + ]); + $this->activity->save(); + } } diff --git a/app/Jobs/DatabaseBackupJob.php b/app/Jobs/DatabaseBackupJob.php index 45586f0d0..b28bce7cf 100644 --- a/app/Jobs/DatabaseBackupJob.php +++ b/app/Jobs/DatabaseBackupJob.php @@ -23,6 +23,7 @@ use Illuminate\Foundation\Bus\Dispatchable; use Illuminate\Queue\InteractsWithQueue; use Illuminate\Queue\SerializesModels; +use Illuminate\Support\Facades\Log; use Illuminate\Support\Str; use Throwable; use Visus\Cuid2\Cuid2; @@ -31,6 +32,16 @@ class DatabaseBackupJob implements ShouldBeEncrypted, ShouldQueue { use Dispatchable, InteractsWithQueue, Queueable, SerializesModels; + /** + * The number of times the job may be attempted. + */ + public $tries = 2; + + /** + * The maximum number of unhandled exceptions to allow before failing. + */ + public $maxExceptions = 1; + public ?Team $team = null; public Server $server; @@ -74,7 +85,7 @@ class DatabaseBackupJob implements ShouldBeEncrypted, ShouldQueue public function __construct(public ScheduledDatabaseBackup $backup) { $this->onQueue('high'); - $this->timeout = $backup->timeout; + $this->timeout = $backup->timeout ?? 3600; } public function handle(): void @@ -659,17 +670,42 @@ private function getFullImageName(): string return "{$helperImage}:{$latestVersion}"; } + /** + * Calculate the number of seconds to wait before retrying the job. + */ + public function backoff(): array + { + return [60, 300]; // 1min, 5min between retries + } + public function failed(?Throwable $exception): void { + Log::channel('scheduled-errors')->error('DatabaseBackup permanently failed', [ + 'job' => 'DatabaseBackupJob', + 'backup_id' => $this->backup->uuid, + 'database' => $this->database?->name ?? 'unknown', + 'database_type' => get_class($this->database ?? new \stdClass), + 'server' => $this->server?->name ?? 'unknown', + 'total_attempts' => $this->attempts(), + 'error' => $exception?->getMessage(), + 'trace' => $exception?->getTraceAsString(), + ]); + $log = ScheduledDatabaseBackupExecution::where('uuid', $this->backup_log_uuid)->first(); if ($log) { $log->update([ 'status' => 'failed', - 'message' => 'Job failed: '.($exception?->getMessage() ?? 'Unknown error'), + 'message' => 'Job permanently failed after '.$this->attempts().' attempts: '.($exception?->getMessage() ?? 'Unknown error'), 'size' => 0, 'filename' => null, + 'finished_at' => Carbon::now(), ]); } + + // Notify team about permanent failure + if ($this->team) { + $this->team->notify(new BackupFailed($this->backup, $this->database, $this->backup_output)); + } } } diff --git a/app/Jobs/ScheduledJobManager.php b/app/Jobs/ScheduledJobManager.php index 9937444b8..75ff883c2 100644 --- a/app/Jobs/ScheduledJobManager.php +++ b/app/Jobs/ScheduledJobManager.php @@ -52,7 +52,7 @@ public function middleware(): array { return [ (new WithoutOverlapping('scheduled-job-manager')) - ->expireAfter(60) // Lock expires after 1 minute to prevent stale locks + ->expireAfter(90) // Lock expires after 90s to handle high-load environments with many tasks ->dontRelease(), // Don't re-queue on lock conflict ]; } diff --git a/app/Jobs/ScheduledTaskJob.php b/app/Jobs/ScheduledTaskJob.php index 609595356..1776d0d78 100644 --- a/app/Jobs/ScheduledTaskJob.php +++ b/app/Jobs/ScheduledTaskJob.php @@ -18,11 +18,27 @@ use Illuminate\Foundation\Bus\Dispatchable; use Illuminate\Queue\InteractsWithQueue; use Illuminate\Queue\SerializesModels; +use Illuminate\Support\Facades\Log; class ScheduledTaskJob implements ShouldQueue { use Dispatchable, InteractsWithQueue, Queueable, SerializesModels; + /** + * The number of times the job may be attempted. + */ + public $tries = 3; + + /** + * The maximum number of unhandled exceptions to allow before failing. + */ + public $maxExceptions = 1; + + /** + * The number of seconds the job can run before timing out. + */ + public $timeout = 300; + public Team $team; public Server $server; @@ -33,6 +49,11 @@ class ScheduledTaskJob implements ShouldQueue public ?ScheduledTaskExecution $task_log = null; + /** + * Store execution ID to survive job serialization for timeout handling. + */ + protected ?int $executionId = null; + public string $task_status = 'failed'; public ?string $task_output = null; @@ -55,6 +76,9 @@ public function __construct($task) } $this->team = Team::findOrFail($task->team_id); $this->server_timezone = $this->getServerTimezone(); + + // Set timeout from task configuration + $this->timeout = $this->task->timeout ?? 300; } private function getServerTimezone(): string @@ -70,11 +94,18 @@ private function getServerTimezone(): string public function handle(): void { + $startTime = Carbon::now(); + try { $this->task_log = ScheduledTaskExecution::create([ 'scheduled_task_id' => $this->task->id, + 'started_at' => $startTime, + 'retry_count' => $this->attempts() - 1, ]); + // Store execution ID for timeout handling + $this->executionId = $this->task_log->id; + $this->server = $this->resource->destination->server; if ($this->resource->type() === 'application') { @@ -129,15 +160,101 @@ public function handle(): void 'message' => $this->task_output ?? $e->getMessage(), ]); } - $this->team?->notify(new TaskFailed($this->task, $e->getMessage())); + + // Log the error to the scheduled-errors channel + Log::channel('scheduled-errors')->error('ScheduledTask execution failed', [ + 'job' => 'ScheduledTaskJob', + 'task_id' => $this->task->uuid, + 'task_name' => $this->task->name, + 'server' => $this->server->name ?? 'unknown', + 'attempt' => $this->attempts(), + 'error' => $e->getMessage(), + ]); + + // Only notify and throw on final failure + + // Re-throw to trigger Laravel's retry mechanism with backoff throw $e; } finally { ScheduledTaskDone::dispatch($this->team->id); if ($this->task_log) { + $finishedAt = Carbon::now(); + $duration = round($startTime->floatDiffInSeconds($finishedAt), 2); + $this->task_log->update([ - 'finished_at' => Carbon::now()->toImmutable(), + 'finished_at' => $finishedAt->toImmutable(), + 'duration' => $duration, ]); } } } + + /** + * Calculate the number of seconds to wait before retrying the job. + */ + public function backoff(): array + { + return [30, 60, 120]; // 30s, 60s, 120s between retries + } + + /** + * Handle a job failure. + */ + public function failed(?\Throwable $exception): void + { + Log::channel('scheduled-errors')->error('ScheduledTask permanently failed', [ + 'job' => 'ScheduledTaskJob', + 'task_id' => $this->task->uuid, + 'task_name' => $this->task->name, + 'server' => $this->server->name ?? 'unknown', + 'total_attempts' => $this->attempts(), + 'error' => $exception?->getMessage(), + 'trace' => $exception?->getTraceAsString(), + ]); + + // Reload execution log from database + // When a job times out, failed() is called in a fresh process with the original + // queue payload, so $executionId will be null. We need to query for the latest execution. + $execution = null; + + // Try to find execution using stored ID first (works for non-timeout failures) + if ($this->executionId) { + $execution = ScheduledTaskExecution::find($this->executionId); + } + + // If no stored ID or not found, query for the most recent execution log for this task + if (! $execution) { + $execution = ScheduledTaskExecution::query() + ->where('scheduled_task_id', $this->task->id) + ->orderBy('created_at', 'desc') + ->first(); + } + + // Last resort: check task_log property + if (! $execution && $this->task_log) { + $execution = $this->task_log; + } + + if ($execution) { + $errorMessage = 'Job permanently failed after '.$this->attempts().' attempts'; + if ($exception) { + $errorMessage .= ': '.$exception->getMessage(); + } + + $execution->update([ + 'status' => 'failed', + 'message' => $errorMessage, + 'error_details' => $exception?->getTraceAsString(), + 'finished_at' => Carbon::now()->toImmutable(), + ]); + } else { + Log::channel('scheduled-errors')->warning('Could not find execution log to update', [ + 'execution_id' => $this->executionId, + 'task_id' => $this->task->uuid, + ]); + } + + // Notify team about permanent failure + $this->team?->notify(new TaskFailed($this->task, $exception?->getMessage() ?? 'Unknown error')); + } } diff --git a/app/Livewire/Project/Database/BackupEdit.php b/app/Livewire/Project/Database/BackupEdit.php index 7deaa82a9..da543a049 100644 --- a/app/Livewire/Project/Database/BackupEdit.php +++ b/app/Livewire/Project/Database/BackupEdit.php @@ -79,7 +79,7 @@ class BackupEdit extends Component #[Validate(['required', 'boolean'])] public bool $dumpAll = false; - #[Validate(['required', 'int', 'min:1', 'max:36000'])] + #[Validate(['required', 'int', 'min:60', 'max:36000'])] public int $timeout = 3600; public function mount() diff --git a/app/Livewire/Project/Shared/ScheduledTask/Add.php b/app/Livewire/Project/Shared/ScheduledTask/Add.php index e4b666532..d7210c15d 100644 --- a/app/Livewire/Project/Shared/ScheduledTask/Add.php +++ b/app/Livewire/Project/Shared/ScheduledTask/Add.php @@ -34,11 +34,14 @@ class Add extends Component public ?string $container = ''; + public int $timeout = 300; + protected $rules = [ 'name' => 'required|string', 'command' => 'required|string', 'frequency' => 'required|string', 'container' => 'nullable|string', + 'timeout' => 'required|integer|min:60|max:3600', ]; protected $validationAttributes = [ @@ -46,6 +49,7 @@ class Add extends Component 'command' => 'command', 'frequency' => 'frequency', 'container' => 'container', + 'timeout' => 'timeout', ]; public function mount() @@ -103,6 +107,7 @@ public function saveScheduledTask() $task->command = $this->command; $task->frequency = $this->frequency; $task->container = $this->container; + $task->timeout = $this->timeout; $task->team_id = currentTeam()->id; switch ($this->type) { @@ -130,5 +135,6 @@ public function clear() $this->command = ''; $this->frequency = ''; $this->container = ''; + $this->timeout = 300; } } diff --git a/app/Livewire/Project/Shared/ScheduledTask/Show.php b/app/Livewire/Project/Shared/ScheduledTask/Show.php index c8d07ae36..920a0efe5 100644 --- a/app/Livewire/Project/Shared/ScheduledTask/Show.php +++ b/app/Livewire/Project/Shared/ScheduledTask/Show.php @@ -40,6 +40,9 @@ class Show extends Component #[Validate(['string', 'nullable'])] public ?string $container = null; + #[Validate(['integer', 'required', 'min:60', 'max:3600'])] + public int $timeout = 300; + #[Locked] public ?string $application_uuid; @@ -99,6 +102,7 @@ public function syncData(bool $toModel = false) $this->task->command = str($this->command)->trim()->value(); $this->task->frequency = str($this->frequency)->trim()->value(); $this->task->container = str($this->container)->trim()->value(); + $this->task->timeout = $this->timeout; $this->task->save(); } else { $this->isEnabled = $this->task->enabled; @@ -106,6 +110,7 @@ public function syncData(bool $toModel = false) $this->command = $this->task->command; $this->frequency = $this->task->frequency; $this->container = $this->task->container; + $this->timeout = $this->task->timeout ?? 300; } } diff --git a/app/Models/ScheduledTask.php b/app/Models/ScheduledTask.php index 06903ffb6..bada0b7a5 100644 --- a/app/Models/ScheduledTask.php +++ b/app/Models/ScheduledTask.php @@ -12,6 +12,14 @@ class ScheduledTask extends BaseModel protected $guarded = []; + protected function casts(): array + { + return [ + 'enabled' => 'boolean', + 'timeout' => 'integer', + ]; + } + public function service() { return $this->belongsTo(Service::class); diff --git a/app/Models/ScheduledTaskExecution.php b/app/Models/ScheduledTaskExecution.php index de13fefb0..02fd6917a 100644 --- a/app/Models/ScheduledTaskExecution.php +++ b/app/Models/ScheduledTaskExecution.php @@ -8,6 +8,16 @@ class ScheduledTaskExecution extends BaseModel { protected $guarded = []; + protected function casts(): array + { + return [ + 'started_at' => 'datetime', + 'finished_at' => 'datetime', + 'retry_count' => 'integer', + 'duration' => 'decimal:2', + ]; + } + public function scheduledTask(): BelongsTo { return $this->belongsTo(ScheduledTask::class); diff --git a/config/logging.php b/config/logging.php index 488327414..1a75978f3 100644 --- a/config/logging.php +++ b/config/logging.php @@ -129,8 +129,8 @@ 'scheduled-errors' => [ 'driver' => 'daily', 'path' => storage_path('logs/scheduled-errors.log'), - 'level' => 'debug', - 'days' => 7, + 'level' => 'warning', + 'days' => 14, ], ], diff --git a/database/migrations/2025_11_09_000001_add_timeout_to_scheduled_tasks_table.php b/database/migrations/2025_11_09_000001_add_timeout_to_scheduled_tasks_table.php new file mode 100644 index 000000000..067861e16 --- /dev/null +++ b/database/migrations/2025_11_09_000001_add_timeout_to_scheduled_tasks_table.php @@ -0,0 +1,28 @@ +integer('timeout')->default(300)->after('frequency'); + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::table('scheduled_tasks', function (Blueprint $table) { + $table->dropColumn('timeout'); + }); + } +}; diff --git a/database/migrations/2025_11_09_000002_improve_scheduled_task_executions_tracking.php b/database/migrations/2025_11_09_000002_improve_scheduled_task_executions_tracking.php new file mode 100644 index 000000000..14fdd5998 --- /dev/null +++ b/database/migrations/2025_11_09_000002_improve_scheduled_task_executions_tracking.php @@ -0,0 +1,31 @@ +timestamp('started_at')->nullable()->after('scheduled_task_id'); + $table->integer('retry_count')->default(0)->after('status'); + $table->decimal('duration', 10, 2)->nullable()->after('retry_count')->comment('Duration in seconds'); + $table->text('error_details')->nullable()->after('message'); + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::table('scheduled_task_executions', function (Blueprint $table) { + $table->dropColumn(['started_at', 'retry_count', 'duration', 'error_details']); + }); + } +}; diff --git a/resources/views/livewire/project/shared/scheduled-task/add.blade.php b/resources/views/livewire/project/shared/scheduled-task/add.blade.php index 0c4b8a4d6..6fa04c28b 100644 --- a/resources/views/livewire/project/shared/scheduled-task/add.blade.php +++ b/resources/views/livewire/project/shared/scheduled-task/add.blade.php @@ -4,6 +4,9 @@ + @if ($type === 'application') @if ($containerNames->count() > 1) diff --git a/resources/views/livewire/project/shared/scheduled-task/show.blade.php b/resources/views/livewire/project/shared/scheduled-task/show.blade.php index 1ede7775a..fa2ce0ad9 100644 --- a/resources/views/livewire/project/shared/scheduled-task/show.blade.php +++ b/resources/views/livewire/project/shared/scheduled-task/show.blade.php @@ -35,6 +35,8 @@ + @if ($type === 'application') first(); + + if (! $server) { + $this->markTestSkipped('No servers available for testing'); + } + + Queue::fake(); + + // Create an activity for the task + $activity = activity() + ->withProperties([ + 'server_uuid' => $server->uuid, + 'command' => 'echo "test"', + 'type' => 'inline', + ]) + ->event('inline') + ->log('[]'); + + // Dispatch the job + CoolifyTask::dispatch( + activity: $activity, + ignore_errors: false, + call_event_on_finish: null, + call_event_data: null + ); + + // Assert job was dispatched + Queue::assertPushed(CoolifyTask::class); +}); + +it('has correct retry configuration on CoolifyTask', function () { + $server = Server::where('ip', '!=', '1.2.3.4')->first(); + + if (! $server) { + $this->markTestSkipped('No servers available for testing'); + } + + $activity = activity() + ->withProperties([ + 'server_uuid' => $server->uuid, + 'command' => 'echo "test"', + 'type' => 'inline', + ]) + ->event('inline') + ->log('[]'); + + $job = new CoolifyTask( + activity: $activity, + ignore_errors: false, + call_event_on_finish: null, + call_event_data: null + ); + + // Assert retry configuration + expect($job->tries)->toBe(3); + expect($job->maxExceptions)->toBe(1); + expect($job->timeout)->toBe(600); + expect($job->backoff())->toBe([30, 90, 180]); +}); diff --git a/tests/Feature/StartupExecutionCleanupTest.php b/tests/Feature/StartupExecutionCleanupTest.php new file mode 100644 index 000000000..3a6b00208 --- /dev/null +++ b/tests/Feature/StartupExecutionCleanupTest.php @@ -0,0 +1,216 @@ +create(); + + // Create a scheduled task + $scheduledTask = ScheduledTask::factory()->create([ + 'team_id' => $team->id, + ]); + + // Create multiple task executions with 'running' status + $runningExecution1 = ScheduledTaskExecution::create([ + 'scheduled_task_id' => $scheduledTask->id, + 'status' => 'running', + 'started_at' => Carbon::now()->subMinutes(10), + ]); + + $runningExecution2 = ScheduledTaskExecution::create([ + 'scheduled_task_id' => $scheduledTask->id, + 'status' => 'running', + 'started_at' => Carbon::now()->subMinutes(5), + ]); + + // Create a completed execution (should not be affected) + $completedExecution = ScheduledTaskExecution::create([ + 'scheduled_task_id' => $scheduledTask->id, + 'status' => 'success', + 'started_at' => Carbon::now()->subMinutes(15), + 'finished_at' => Carbon::now()->subMinutes(14), + ]); + + // Run the app:init command + Artisan::call('app:init'); + + // Refresh models from database + $runningExecution1->refresh(); + $runningExecution2->refresh(); + $completedExecution->refresh(); + + // Assert running executions are now failed + expect($runningExecution1->status)->toBe('failed') + ->and($runningExecution1->message)->toBe('Marked as failed during Coolify startup - job was interrupted') + ->and($runningExecution1->finished_at)->not->toBeNull() + ->and($runningExecution1->finished_at->toDateTimeString())->toBe('2025-01-15 12:00:00'); + + expect($runningExecution2->status)->toBe('failed') + ->and($runningExecution2->message)->toBe('Marked as failed during Coolify startup - job was interrupted') + ->and($runningExecution2->finished_at)->not->toBeNull(); + + // Assert completed execution is unchanged + expect($completedExecution->status)->toBe('success') + ->and($completedExecution->message)->toBeNull(); + + // Assert NO notifications were sent + Notification::assertNothingSent(); +}); + +test('app:init marks stuck database backup executions as failed', function () { + // Create a team for the scheduled backup + $team = Team::factory()->create(); + + // Create a database + $database = StandalonePostgresql::factory()->create([ + 'team_id' => $team->id, + ]); + + // Create a scheduled backup + $scheduledBackup = ScheduledDatabaseBackup::factory()->create([ + 'team_id' => $team->id, + 'database_id' => $database->id, + 'database_type' => StandalonePostgresql::class, + ]); + + // Create multiple backup executions with 'running' status + $runningBackup1 = ScheduledDatabaseBackupExecution::create([ + 'scheduled_database_backup_id' => $scheduledBackup->id, + 'status' => 'running', + 'database_name' => 'test_db', + ]); + + $runningBackup2 = ScheduledDatabaseBackupExecution::create([ + 'scheduled_database_backup_id' => $scheduledBackup->id, + 'status' => 'running', + 'database_name' => 'test_db_2', + ]); + + // Create a successful backup (should not be affected) + $successfulBackup = ScheduledDatabaseBackupExecution::create([ + 'scheduled_database_backup_id' => $scheduledBackup->id, + 'status' => 'success', + 'database_name' => 'test_db_3', + 'finished_at' => Carbon::now()->subMinutes(20), + ]); + + // Run the app:init command + Artisan::call('app:init'); + + // Refresh models from database + $runningBackup1->refresh(); + $runningBackup2->refresh(); + $successfulBackup->refresh(); + + // Assert running backups are now failed + expect($runningBackup1->status)->toBe('failed') + ->and($runningBackup1->message)->toBe('Marked as failed during Coolify startup - job was interrupted') + ->and($runningBackup1->finished_at)->not->toBeNull() + ->and($runningBackup1->finished_at->toDateTimeString())->toBe('2025-01-15 12:00:00'); + + expect($runningBackup2->status)->toBe('failed') + ->and($runningBackup2->message)->toBe('Marked as failed during Coolify startup - job was interrupted') + ->and($runningBackup2->finished_at)->not->toBeNull(); + + // Assert successful backup is unchanged + expect($successfulBackup->status)->toBe('success') + ->and($successfulBackup->message)->toBeNull(); + + // Assert NO notifications were sent + Notification::assertNothingSent(); +}); + +test('app:init handles cleanup when no stuck executions exist', function () { + // Create a team + $team = Team::factory()->create(); + + // Create a scheduled task + $scheduledTask = ScheduledTask::factory()->create([ + 'team_id' => $team->id, + ]); + + // Create only completed executions + ScheduledTaskExecution::create([ + 'scheduled_task_id' => $scheduledTask->id, + 'status' => 'success', + 'started_at' => Carbon::now()->subMinutes(10), + 'finished_at' => Carbon::now()->subMinutes(9), + ]); + + ScheduledTaskExecution::create([ + 'scheduled_task_id' => $scheduledTask->id, + 'status' => 'failed', + 'started_at' => Carbon::now()->subMinutes(20), + 'finished_at' => Carbon::now()->subMinutes(19), + ]); + + // Run the app:init command (should not fail) + $exitCode = Artisan::call('app:init'); + + // Assert command succeeded + expect($exitCode)->toBe(0); + + // Assert all executions remain unchanged + expect(ScheduledTaskExecution::where('status', 'running')->count())->toBe(0) + ->and(ScheduledTaskExecution::where('status', 'success')->count())->toBe(1) + ->and(ScheduledTaskExecution::where('status', 'failed')->count())->toBe(1); + + // Assert NO notifications were sent + Notification::assertNothingSent(); +}); + +test('cleanup does not send notifications even when team has notification settings', function () { + // Create a team with notification settings enabled + $team = Team::factory()->create([ + 'smtp_enabled' => true, + 'smtp_from_address' => 'test@example.com', + ]); + + // Create a scheduled task + $scheduledTask = ScheduledTask::factory()->create([ + 'team_id' => $team->id, + ]); + + // Create a running execution + $runningExecution = ScheduledTaskExecution::create([ + 'scheduled_task_id' => $scheduledTask->id, + 'status' => 'running', + 'started_at' => Carbon::now()->subMinutes(5), + ]); + + // Run the app:init command + Artisan::call('app:init'); + + // Refresh model + $runningExecution->refresh(); + + // Assert execution is failed + expect($runningExecution->status)->toBe('failed'); + + // Assert NO notifications were sent despite team having notification settings + Notification::assertNothingSent(); +}); diff --git a/tests/Unit/ScheduledJobsRetryConfigTest.php b/tests/Unit/ScheduledJobsRetryConfigTest.php new file mode 100644 index 000000000..f46cb9fd1 --- /dev/null +++ b/tests/Unit/ScheduledJobsRetryConfigTest.php @@ -0,0 +1,77 @@ +hasProperty('tries'))->toBeTrue() + ->and($reflection->hasProperty('maxExceptions'))->toBeTrue() + ->and($reflection->hasProperty('timeout'))->toBeTrue() + ->and($reflection->hasMethod('backoff'))->toBeTrue(); + + // Get default values from class definition + $defaultProperties = $reflection->getDefaultProperties(); + + expect($defaultProperties['tries'])->toBe(3) + ->and($defaultProperties['maxExceptions'])->toBe(1) + ->and($defaultProperties['timeout'])->toBe(600); +}); + +it('ScheduledTaskJob has correct retry properties defined', function () { + $reflection = new ReflectionClass(ScheduledTaskJob::class); + + // Check public properties exist + expect($reflection->hasProperty('tries'))->toBeTrue() + ->and($reflection->hasProperty('maxExceptions'))->toBeTrue() + ->and($reflection->hasProperty('timeout'))->toBeTrue() + ->and($reflection->hasMethod('backoff'))->toBeTrue() + ->and($reflection->hasMethod('failed'))->toBeTrue(); + + // Get default values from class definition + $defaultProperties = $reflection->getDefaultProperties(); + + expect($defaultProperties['tries'])->toBe(3) + ->and($defaultProperties['maxExceptions'])->toBe(1) + ->and($defaultProperties['timeout'])->toBe(300); +}); + +it('DatabaseBackupJob has correct retry properties defined', function () { + $reflection = new ReflectionClass(DatabaseBackupJob::class); + + // Check public properties exist + expect($reflection->hasProperty('tries'))->toBeTrue() + ->and($reflection->hasProperty('maxExceptions'))->toBeTrue() + ->and($reflection->hasProperty('timeout'))->toBeTrue() + ->and($reflection->hasMethod('backoff'))->toBeTrue() + ->and($reflection->hasMethod('failed'))->toBeTrue(); + + // Get default values from class definition + $defaultProperties = $reflection->getDefaultProperties(); + + expect($defaultProperties['tries'])->toBe(2) + ->and($defaultProperties['maxExceptions'])->toBe(1) + ->and($defaultProperties['timeout'])->toBe(3600); +}); + +it('DatabaseBackupJob enforces minimum timeout of 60 seconds', function () { + // Read the constructor to verify minimum timeout enforcement + $reflection = new ReflectionClass(DatabaseBackupJob::class); + $constructor = $reflection->getMethod('__construct'); + + // Get the constructor source + $filename = $reflection->getFileName(); + $startLine = $constructor->getStartLine(); + $endLine = $constructor->getEndLine(); + + $source = file($filename); + $constructorSource = implode('', array_slice($source, $startLine - 1, $endLine - $startLine + 1)); + + // Verify the implementation enforces minimum of 60 seconds + expect($constructorSource) + ->toContain('max(') + ->toContain('60'); +}); diff --git a/tests/Unit/ScheduledTaskJobTimeoutTest.php b/tests/Unit/ScheduledTaskJobTimeoutTest.php new file mode 100644 index 000000000..99117fbca --- /dev/null +++ b/tests/Unit/ScheduledTaskJobTimeoutTest.php @@ -0,0 +1,96 @@ +hasProperty('executionId'))->toBeTrue(); + + // Verify it's protected (will be serialized with the job) + $property = $reflection->getProperty('executionId'); + expect($property->isProtected())->toBeTrue(); +}); + +it('has failed method that handles job failures', function () { + $reflection = new ReflectionClass(ScheduledTaskJob::class); + + // Verify failed() method exists + expect($reflection->hasMethod('failed'))->toBeTrue(); + + // Verify it accepts a Throwable parameter + $method = $reflection->getMethod('failed'); + $parameters = $method->getParameters(); + + expect($parameters)->toHaveCount(1); + expect($parameters[0]->getName())->toBe('exception'); + expect($parameters[0]->allowsNull())->toBeTrue(); +}); + +it('failed method implementation reloads execution from database', function () { + // Read the failed() method source code to verify it reloads from database + $reflection = new ReflectionClass(ScheduledTaskJob::class); + $method = $reflection->getMethod('failed'); + + // Get the file and method source + $filename = $reflection->getFileName(); + $startLine = $method->getStartLine(); + $endLine = $method->getEndLine(); + + $source = file($filename); + $methodSource = implode('', array_slice($source, $startLine - 1, $endLine - $startLine + 1)); + + // Verify the implementation includes reloading from database + expect($methodSource) + ->toContain('$this->executionId') + ->toContain('ScheduledTaskExecution::find') + ->toContain('ScheduledTaskExecution::query') + ->toContain('scheduled_task_id') + ->toContain('orderBy') + ->toContain('status') + ->toContain('failed') + ->toContain('notify'); +}); + +it('failed method updates execution with error_details field', function () { + // Read the failed() method source code to verify error_details is populated + $reflection = new ReflectionClass(ScheduledTaskJob::class); + $method = $reflection->getMethod('failed'); + + // Get the file and method source + $filename = $reflection->getFileName(); + $startLine = $method->getStartLine(); + $endLine = $method->getEndLine(); + + $source = file($filename); + $methodSource = implode('', array_slice($source, $startLine - 1, $endLine - $startLine + 1)); + + // Verify the implementation populates error_details field + expect($methodSource)->toContain('error_details'); +}); + +it('failed method logs when execution cannot be found', function () { + // Read the failed() method source code to verify defensive logging + $reflection = new ReflectionClass(ScheduledTaskJob::class); + $method = $reflection->getMethod('failed'); + + // Get the file and method source + $filename = $reflection->getFileName(); + $startLine = $method->getStartLine(); + $endLine = $method->getEndLine(); + + $source = file($filename); + $methodSource = implode('', array_slice($source, $startLine - 1, $endLine - $startLine + 1)); + + // Verify the implementation logs a warning if execution is not found + expect($methodSource) + ->toContain('Could not find execution log') + ->toContain('warning'); +}); diff --git a/tests/Unit/StartupExecutionCleanupTest.php b/tests/Unit/StartupExecutionCleanupTest.php new file mode 100644 index 000000000..1fae590eb --- /dev/null +++ b/tests/Unit/StartupExecutionCleanupTest.php @@ -0,0 +1,116 @@ +shouldReceive('where') + ->once() + ->with('status', 'running') + ->andReturnSelf(); + + // Expect update to be called with correct parameters + $mockBuilder->shouldReceive('update') + ->once() + ->with([ + 'status' => 'failed', + 'message' => 'Marked as failed during Coolify startup - job was interrupted', + 'finished_at' => Carbon::now(), + ]) + ->andReturn(2); // Simulate 2 records updated + + // Execute the cleanup logic directly + $updatedCount = ScheduledTaskExecution::where('status', 'running')->update([ + 'status' => 'failed', + 'message' => 'Marked as failed during Coolify startup - job was interrupted', + 'finished_at' => Carbon::now(), + ]); + + // Assert the count is correct + expect($updatedCount)->toBe(2); +}); + +it('marks stuck database backup executions as failed without triggering notifications', function () { + // Mock the ScheduledDatabaseBackupExecution model + $mockBuilder = \Mockery::mock('alias:'.ScheduledDatabaseBackupExecution::class); + + // Expect where clause to be called with 'running' status + $mockBuilder->shouldReceive('where') + ->once() + ->with('status', 'running') + ->andReturnSelf(); + + // Expect update to be called with correct parameters + $mockBuilder->shouldReceive('update') + ->once() + ->with([ + 'status' => 'failed', + 'message' => 'Marked as failed during Coolify startup - job was interrupted', + 'finished_at' => Carbon::now(), + ]) + ->andReturn(3); // Simulate 3 records updated + + // Execute the cleanup logic directly + $updatedCount = ScheduledDatabaseBackupExecution::where('status', 'running')->update([ + 'status' => 'failed', + 'message' => 'Marked as failed during Coolify startup - job was interrupted', + 'finished_at' => Carbon::now(), + ]); + + // Assert the count is correct + expect($updatedCount)->toBe(3); +}); + +it('handles cleanup when no stuck executions exist', function () { + // Mock the ScheduledTaskExecution model + $mockBuilder = \Mockery::mock('alias:'.ScheduledTaskExecution::class); + + $mockBuilder->shouldReceive('where') + ->once() + ->with('status', 'running') + ->andReturnSelf(); + + $mockBuilder->shouldReceive('update') + ->once() + ->andReturn(0); // No records updated + + $updatedCount = ScheduledTaskExecution::where('status', 'running')->update([ + 'status' => 'failed', + 'message' => 'Marked as failed during Coolify startup - job was interrupted', + 'finished_at' => Carbon::now(), + ]); + + expect($updatedCount)->toBe(0); +}); + +it('uses correct failure message for interrupted jobs', function () { + $expectedMessage = 'Marked as failed during Coolify startup - job was interrupted'; + + // Verify the message clearly indicates the job was interrupted during startup + expect($expectedMessage) + ->toContain('Coolify startup') + ->toContain('interrupted') + ->toContain('failed'); +}); + +it('sets finished_at timestamp when marking executions as failed', function () { + $now = Carbon::now(); + + // Verify Carbon::now() is used for finished_at + expect($now)->toBeInstanceOf(Carbon::class) + ->and($now->toDateTimeString())->toBe('2025-01-15 12:00:00'); +});