diff --git a/app/Console/Commands/CleanupRedis.php b/app/Console/Commands/CleanupRedis.php
index f6a2de75b..a5fdc33e0 100644
--- a/app/Console/Commands/CleanupRedis.php
+++ b/app/Console/Commands/CleanupRedis.php
@@ -7,7 +7,7 @@
class CleanupRedis extends Command
{
- protected $signature = 'cleanup:redis {--dry-run : Show what would be deleted without actually deleting} {--skip-overlapping : Skip overlapping queue cleanup} {--clear-locks : Clear stale WithoutOverlapping locks}';
+ protected $signature = 'cleanup:redis {--dry-run : Show what would be deleted without actually deleting} {--skip-overlapping : Skip overlapping queue cleanup} {--clear-locks : Clear stale WithoutOverlapping locks} {--restart : Aggressive cleanup mode for system restart (marks all processing jobs as failed)}';
protected $description = 'Cleanup Redis (Horizon jobs, metrics, overlapping queues, cache locks, and related data)';
@@ -63,6 +63,14 @@ public function handle()
$deletedCount += $locksCleaned;
}
+ // Clean up stuck jobs (restart mode = aggressive, runtime mode = conservative)
+ $isRestart = $this->option('restart');
+ if ($isRestart || $this->option('clear-locks')) {
+ $this->info($isRestart ? 'Cleaning up stuck jobs (RESTART MODE - aggressive)...' : 'Checking for stuck jobs (runtime mode - conservative)...');
+ $jobsCleaned = $this->cleanupStuckJobs($redis, $prefix, $dryRun, $isRestart);
+ $deletedCount += $jobsCleaned;
+ }
+
if ($dryRun) {
$this->info("DRY RUN: Would delete {$deletedCount} out of {$totalKeys} keys");
} else {
@@ -332,4 +340,98 @@ private function cleanupCacheLocks(bool $dryRun): int
return $cleanedCount;
}
+
+ /**
+ * Clean up stuck jobs based on mode (restart vs runtime).
+ *
+ * @param mixed $redis Redis connection
+ * @param string $prefix Horizon prefix
+ * @param bool $dryRun Dry run mode
+ * @param bool $isRestart Restart mode (aggressive) vs runtime mode (conservative)
+ * @return int Number of jobs cleaned
+ */
+ private function cleanupStuckJobs($redis, string $prefix, bool $dryRun, bool $isRestart): int
+ {
+ $cleanedCount = 0;
+ $now = time();
+
+ // Get all keys with the horizon prefix
+ $keys = $redis->keys('*');
+
+ foreach ($keys as $key) {
+ $keyWithoutPrefix = str_replace($prefix, '', $key);
+ $type = $redis->command('type', [$keyWithoutPrefix]);
+
+ // Only process hash-type keys (individual jobs)
+ if ($type !== 5) {
+ continue;
+ }
+
+ $data = $redis->command('hgetall', [$keyWithoutPrefix]);
+ $status = data_get($data, 'status');
+ $payload = data_get($data, 'payload');
+
+ // Only process jobs in "processing" or "reserved" state
+ if (! in_array($status, ['processing', 'reserved'])) {
+ continue;
+ }
+
+ // Parse job payload to get job class and started time
+ $payloadData = json_decode($payload, true);
+ $jobClass = data_get($payloadData, 'displayName', 'Unknown');
+ $pushedAt = (int) data_get($data, 'pushed_at', 0);
+
+ // Calculate how long the job has been processing
+ $processingTime = $now - $pushedAt;
+
+ $shouldFail = false;
+ $reason = '';
+
+ if ($isRestart) {
+ // RESTART MODE: Mark ALL processing/reserved jobs as failed
+ // Safe because all workers are dead on restart
+ $shouldFail = true;
+ $reason = 'System restart - all workers terminated';
+ } else {
+ // RUNTIME MODE: Only mark truly stuck jobs as failed
+ // Be conservative to avoid killing legitimate long-running jobs
+
+ // Skip ApplicationDeploymentJob entirely (has dynamic_timeout, can run 2+ hours)
+ if (str_contains($jobClass, 'ApplicationDeploymentJob')) {
+ continue;
+ }
+
+ // Skip DatabaseBackupJob (large backups can take hours)
+ if (str_contains($jobClass, 'DatabaseBackupJob')) {
+ continue;
+ }
+
+ // For other jobs, only fail if processing > 12 hours
+ if ($processingTime > 43200) { // 12 hours
+ $shouldFail = true;
+ $reason = 'Processing for more than 12 hours';
+ }
+ }
+
+ if ($shouldFail) {
+ if ($dryRun) {
+ $this->warn(" Would mark as FAILED: {$jobClass} (processing for ".round($processingTime / 60, 1)." min) - {$reason}");
+ } else {
+ // Mark job as failed
+ $redis->command('hset', [$keyWithoutPrefix, 'status', 'failed']);
+ $redis->command('hset', [$keyWithoutPrefix, 'failed_at', $now]);
+ $redis->command('hset', [$keyWithoutPrefix, 'exception', "Job cleaned up by cleanup:redis - {$reason}"]);
+
+ $this->info(" ✓ Marked as FAILED: {$jobClass} (processing for ".round($processingTime / 60, 1).' min) - '.$reason);
+ }
+ $cleanedCount++;
+ }
+ }
+
+ if ($cleanedCount === 0) {
+ $this->info($isRestart ? ' No jobs to clean up' : ' No stuck jobs found (all jobs running normally)');
+ }
+
+ return $cleanedCount;
+ }
}
diff --git a/app/Console/Commands/Dev.php b/app/Console/Commands/Dev.php
index 8f26d78ff..acc6dc2f9 100644
--- a/app/Console/Commands/Dev.php
+++ b/app/Console/Commands/Dev.php
@@ -4,6 +4,9 @@
use App\Jobs\CheckHelperImageJob;
use App\Models\InstanceSettings;
+use App\Models\ScheduledDatabaseBackupExecution;
+use App\Models\ScheduledTaskExecution;
+use Carbon\Carbon;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Artisan;
@@ -45,6 +48,44 @@ public function init()
} else {
echo "Instance already initialized.\n";
}
+
+ // Clean up stuck jobs and stale locks on development startup
+ try {
+ echo "Cleaning up Redis (stuck jobs and stale locks)...\n";
+ Artisan::call('cleanup:redis', ['--restart' => true, '--clear-locks' => true]);
+ echo "Redis cleanup completed.\n";
+ } catch (\Throwable $e) {
+ echo "Error in cleanup:redis: {$e->getMessage()}\n";
+ }
+
+ try {
+ $updatedTaskCount = ScheduledTaskExecution::where('status', 'running')->update([
+ 'status' => 'failed',
+ 'message' => 'Marked as failed during Coolify startup - job was interrupted',
+ 'finished_at' => Carbon::now(),
+ ]);
+
+ if ($updatedTaskCount > 0) {
+ echo "Marked {$updatedTaskCount} stuck scheduled task executions as failed\n";
+ }
+ } catch (\Throwable $e) {
+ echo "Could not cleanup stuck scheduled task executions: {$e->getMessage()}\n";
+ }
+
+ try {
+ $updatedBackupCount = ScheduledDatabaseBackupExecution::where('status', 'running')->update([
+ 'status' => 'failed',
+ 'message' => 'Marked as failed during Coolify startup - job was interrupted',
+ 'finished_at' => Carbon::now(),
+ ]);
+
+ if ($updatedBackupCount > 0) {
+ echo "Marked {$updatedBackupCount} stuck database backup executions as failed\n";
+ }
+ } catch (\Throwable $e) {
+ echo "Could not cleanup stuck database backup executions: {$e->getMessage()}\n";
+ }
+
CheckHelperImageJob::dispatch();
}
}
diff --git a/app/Console/Commands/Init.php b/app/Console/Commands/Init.php
index 975a3c006..66cb77838 100644
--- a/app/Console/Commands/Init.php
+++ b/app/Console/Commands/Init.php
@@ -10,9 +10,12 @@
use App\Models\Environment;
use App\Models\InstanceSettings;
use App\Models\ScheduledDatabaseBackup;
+use App\Models\ScheduledDatabaseBackupExecution;
+use App\Models\ScheduledTaskExecution;
use App\Models\Server;
use App\Models\StandalonePostgresql;
use App\Models\User;
+use Carbon\Carbon;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Artisan;
use Illuminate\Support\Facades\File;
@@ -73,7 +76,7 @@ public function handle()
$this->cleanupUnusedNetworkFromCoolifyProxy();
try {
- $this->call('cleanup:redis', ['--clear-locks' => true]);
+ $this->call('cleanup:redis', ['--restart' => true, '--clear-locks' => true]);
} catch (\Throwable $e) {
echo "Error in cleanup:redis command: {$e->getMessage()}\n";
}
@@ -103,6 +106,34 @@ public function handle()
echo "Could not cleanup inprogress deployments: {$e->getMessage()}\n";
}
+ try {
+ $updatedTaskCount = ScheduledTaskExecution::where('status', 'running')->update([
+ 'status' => 'failed',
+ 'message' => 'Marked as failed during Coolify startup - job was interrupted',
+ 'finished_at' => Carbon::now(),
+ ]);
+
+ if ($updatedTaskCount > 0) {
+ echo "Marked {$updatedTaskCount} stuck scheduled task executions as failed\n";
+ }
+ } catch (\Throwable $e) {
+ echo "Could not cleanup stuck scheduled task executions: {$e->getMessage()}\n";
+ }
+
+ try {
+ $updatedBackupCount = ScheduledDatabaseBackupExecution::where('status', 'running')->update([
+ 'status' => 'failed',
+ 'message' => 'Marked as failed during Coolify startup - job was interrupted',
+ 'finished_at' => Carbon::now(),
+ ]);
+
+ if ($updatedBackupCount > 0) {
+ echo "Marked {$updatedBackupCount} stuck database backup executions as failed\n";
+ }
+ } catch (\Throwable $e) {
+ echo "Could not cleanup stuck database backup executions: {$e->getMessage()}\n";
+ }
+
try {
$localhost = $this->servers->where('id', 0)->first();
if ($localhost) {
diff --git a/app/Jobs/CoolifyTask.php b/app/Jobs/CoolifyTask.php
index 49a5ba8dd..d6dc6fa05 100755
--- a/app/Jobs/CoolifyTask.php
+++ b/app/Jobs/CoolifyTask.php
@@ -3,18 +3,35 @@
namespace App\Jobs;
use App\Actions\CoolifyTask\RunRemoteProcess;
+use App\Enums\ProcessStatus;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldBeEncrypted;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
+use Illuminate\Support\Facades\Log;
use Spatie\Activitylog\Models\Activity;
class CoolifyTask implements ShouldBeEncrypted, ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
+ /**
+ * The number of times the job may be attempted.
+ */
+ public $tries = 3;
+
+ /**
+ * The maximum number of unhandled exceptions to allow before failing.
+ */
+ public $maxExceptions = 1;
+
+ /**
+ * The number of seconds the job can run before timing out.
+ */
+ public $timeout = 600;
+
/**
* Create a new job instance.
*/
@@ -42,4 +59,36 @@ public function handle(): void
$remote_process();
}
+
+ /**
+ * Calculate the number of seconds to wait before retrying the job.
+ */
+ public function backoff(): array
+ {
+ return [30, 90, 180]; // 30s, 90s, 180s between retries
+ }
+
+ /**
+ * Handle a job failure.
+ */
+ public function failed(?\Throwable $exception): void
+ {
+ Log::channel('scheduled-errors')->error('CoolifyTask permanently failed', [
+ 'job' => 'CoolifyTask',
+ 'activity_id' => $this->activity->id,
+ 'server_uuid' => $this->activity->getExtraProperty('server_uuid'),
+ 'command_preview' => substr($this->activity->getExtraProperty('command') ?? '', 0, 200),
+ 'error' => $exception?->getMessage(),
+ 'total_attempts' => $this->attempts(),
+ 'trace' => $exception?->getTraceAsString(),
+ ]);
+
+ // Update activity status to reflect permanent failure
+ $this->activity->properties = $this->activity->properties->merge([
+ 'status' => ProcessStatus::ERROR->value,
+ 'error' => $exception?->getMessage() ?? 'Job permanently failed',
+ 'failed_at' => now()->toIso8601String(),
+ ]);
+ $this->activity->save();
+ }
}
diff --git a/app/Jobs/DatabaseBackupJob.php b/app/Jobs/DatabaseBackupJob.php
index 45586f0d0..b28bce7cf 100644
--- a/app/Jobs/DatabaseBackupJob.php
+++ b/app/Jobs/DatabaseBackupJob.php
@@ -23,6 +23,7 @@
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
+use Illuminate\Support\Facades\Log;
use Illuminate\Support\Str;
use Throwable;
use Visus\Cuid2\Cuid2;
@@ -31,6 +32,16 @@ class DatabaseBackupJob implements ShouldBeEncrypted, ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
+ /**
+ * The number of times the job may be attempted.
+ */
+ public $tries = 2;
+
+ /**
+ * The maximum number of unhandled exceptions to allow before failing.
+ */
+ public $maxExceptions = 1;
+
public ?Team $team = null;
public Server $server;
@@ -74,7 +85,7 @@ class DatabaseBackupJob implements ShouldBeEncrypted, ShouldQueue
public function __construct(public ScheduledDatabaseBackup $backup)
{
$this->onQueue('high');
- $this->timeout = $backup->timeout;
+ $this->timeout = $backup->timeout ?? 3600;
}
public function handle(): void
@@ -659,17 +670,42 @@ private function getFullImageName(): string
return "{$helperImage}:{$latestVersion}";
}
+ /**
+ * Calculate the number of seconds to wait before retrying the job.
+ */
+ public function backoff(): array
+ {
+ return [60, 300]; // 1min, 5min between retries
+ }
+
public function failed(?Throwable $exception): void
{
+ Log::channel('scheduled-errors')->error('DatabaseBackup permanently failed', [
+ 'job' => 'DatabaseBackupJob',
+ 'backup_id' => $this->backup->uuid,
+ 'database' => $this->database?->name ?? 'unknown',
+ 'database_type' => get_class($this->database ?? new \stdClass),
+ 'server' => $this->server?->name ?? 'unknown',
+ 'total_attempts' => $this->attempts(),
+ 'error' => $exception?->getMessage(),
+ 'trace' => $exception?->getTraceAsString(),
+ ]);
+
$log = ScheduledDatabaseBackupExecution::where('uuid', $this->backup_log_uuid)->first();
if ($log) {
$log->update([
'status' => 'failed',
- 'message' => 'Job failed: '.($exception?->getMessage() ?? 'Unknown error'),
+ 'message' => 'Job permanently failed after '.$this->attempts().' attempts: '.($exception?->getMessage() ?? 'Unknown error'),
'size' => 0,
'filename' => null,
+ 'finished_at' => Carbon::now(),
]);
}
+
+ // Notify team about permanent failure
+ if ($this->team) {
+ $this->team->notify(new BackupFailed($this->backup, $this->database, $this->backup_output));
+ }
}
}
diff --git a/app/Jobs/ScheduledJobManager.php b/app/Jobs/ScheduledJobManager.php
index 9937444b8..75ff883c2 100644
--- a/app/Jobs/ScheduledJobManager.php
+++ b/app/Jobs/ScheduledJobManager.php
@@ -52,7 +52,7 @@ public function middleware(): array
{
return [
(new WithoutOverlapping('scheduled-job-manager'))
- ->expireAfter(60) // Lock expires after 1 minute to prevent stale locks
+ ->expireAfter(90) // Lock expires after 90s to handle high-load environments with many tasks
->dontRelease(), // Don't re-queue on lock conflict
];
}
diff --git a/app/Jobs/ScheduledTaskJob.php b/app/Jobs/ScheduledTaskJob.php
index 609595356..1776d0d78 100644
--- a/app/Jobs/ScheduledTaskJob.php
+++ b/app/Jobs/ScheduledTaskJob.php
@@ -18,11 +18,27 @@
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
+use Illuminate\Support\Facades\Log;
class ScheduledTaskJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
+ /**
+ * The number of times the job may be attempted.
+ */
+ public $tries = 3;
+
+ /**
+ * The maximum number of unhandled exceptions to allow before failing.
+ */
+ public $maxExceptions = 1;
+
+ /**
+ * The number of seconds the job can run before timing out.
+ */
+ public $timeout = 300;
+
public Team $team;
public Server $server;
@@ -33,6 +49,11 @@ class ScheduledTaskJob implements ShouldQueue
public ?ScheduledTaskExecution $task_log = null;
+ /**
+ * Store execution ID to survive job serialization for timeout handling.
+ */
+ protected ?int $executionId = null;
+
public string $task_status = 'failed';
public ?string $task_output = null;
@@ -55,6 +76,9 @@ public function __construct($task)
}
$this->team = Team::findOrFail($task->team_id);
$this->server_timezone = $this->getServerTimezone();
+
+ // Set timeout from task configuration
+ $this->timeout = $this->task->timeout ?? 300;
}
private function getServerTimezone(): string
@@ -70,11 +94,18 @@ private function getServerTimezone(): string
public function handle(): void
{
+ $startTime = Carbon::now();
+
try {
$this->task_log = ScheduledTaskExecution::create([
'scheduled_task_id' => $this->task->id,
+ 'started_at' => $startTime,
+ 'retry_count' => $this->attempts() - 1,
]);
+ // Store execution ID for timeout handling
+ $this->executionId = $this->task_log->id;
+
$this->server = $this->resource->destination->server;
if ($this->resource->type() === 'application') {
@@ -129,15 +160,101 @@ public function handle(): void
'message' => $this->task_output ?? $e->getMessage(),
]);
}
- $this->team?->notify(new TaskFailed($this->task, $e->getMessage()));
+
+ // Log the error to the scheduled-errors channel
+ Log::channel('scheduled-errors')->error('ScheduledTask execution failed', [
+ 'job' => 'ScheduledTaskJob',
+ 'task_id' => $this->task->uuid,
+ 'task_name' => $this->task->name,
+ 'server' => $this->server->name ?? 'unknown',
+ 'attempt' => $this->attempts(),
+ 'error' => $e->getMessage(),
+ ]);
+
+ // Only notify and throw on final failure
+
+ // Re-throw to trigger Laravel's retry mechanism with backoff
throw $e;
} finally {
ScheduledTaskDone::dispatch($this->team->id);
if ($this->task_log) {
+ $finishedAt = Carbon::now();
+ $duration = round($startTime->floatDiffInSeconds($finishedAt), 2);
+
$this->task_log->update([
- 'finished_at' => Carbon::now()->toImmutable(),
+ 'finished_at' => $finishedAt->toImmutable(),
+ 'duration' => $duration,
]);
}
}
}
+
+ /**
+ * Calculate the number of seconds to wait before retrying the job.
+ */
+ public function backoff(): array
+ {
+ return [30, 60, 120]; // 30s, 60s, 120s between retries
+ }
+
+ /**
+ * Handle a job failure.
+ */
+ public function failed(?\Throwable $exception): void
+ {
+ Log::channel('scheduled-errors')->error('ScheduledTask permanently failed', [
+ 'job' => 'ScheduledTaskJob',
+ 'task_id' => $this->task->uuid,
+ 'task_name' => $this->task->name,
+ 'server' => $this->server->name ?? 'unknown',
+ 'total_attempts' => $this->attempts(),
+ 'error' => $exception?->getMessage(),
+ 'trace' => $exception?->getTraceAsString(),
+ ]);
+
+ // Reload execution log from database
+ // When a job times out, failed() is called in a fresh process with the original
+ // queue payload, so $executionId will be null. We need to query for the latest execution.
+ $execution = null;
+
+ // Try to find execution using stored ID first (works for non-timeout failures)
+ if ($this->executionId) {
+ $execution = ScheduledTaskExecution::find($this->executionId);
+ }
+
+ // If no stored ID or not found, query for the most recent execution log for this task
+ if (! $execution) {
+ $execution = ScheduledTaskExecution::query()
+ ->where('scheduled_task_id', $this->task->id)
+ ->orderBy('created_at', 'desc')
+ ->first();
+ }
+
+ // Last resort: check task_log property
+ if (! $execution && $this->task_log) {
+ $execution = $this->task_log;
+ }
+
+ if ($execution) {
+ $errorMessage = 'Job permanently failed after '.$this->attempts().' attempts';
+ if ($exception) {
+ $errorMessage .= ': '.$exception->getMessage();
+ }
+
+ $execution->update([
+ 'status' => 'failed',
+ 'message' => $errorMessage,
+ 'error_details' => $exception?->getTraceAsString(),
+ 'finished_at' => Carbon::now()->toImmutable(),
+ ]);
+ } else {
+ Log::channel('scheduled-errors')->warning('Could not find execution log to update', [
+ 'execution_id' => $this->executionId,
+ 'task_id' => $this->task->uuid,
+ ]);
+ }
+
+ // Notify team about permanent failure
+ $this->team?->notify(new TaskFailed($this->task, $exception?->getMessage() ?? 'Unknown error'));
+ }
}
diff --git a/app/Livewire/Project/Database/BackupEdit.php b/app/Livewire/Project/Database/BackupEdit.php
index 7deaa82a9..da543a049 100644
--- a/app/Livewire/Project/Database/BackupEdit.php
+++ b/app/Livewire/Project/Database/BackupEdit.php
@@ -79,7 +79,7 @@ class BackupEdit extends Component
#[Validate(['required', 'boolean'])]
public bool $dumpAll = false;
- #[Validate(['required', 'int', 'min:1', 'max:36000'])]
+ #[Validate(['required', 'int', 'min:60', 'max:36000'])]
public int $timeout = 3600;
public function mount()
diff --git a/app/Livewire/Project/Shared/ScheduledTask/Add.php b/app/Livewire/Project/Shared/ScheduledTask/Add.php
index e4b666532..d7210c15d 100644
--- a/app/Livewire/Project/Shared/ScheduledTask/Add.php
+++ b/app/Livewire/Project/Shared/ScheduledTask/Add.php
@@ -34,11 +34,14 @@ class Add extends Component
public ?string $container = '';
+ public int $timeout = 300;
+
protected $rules = [
'name' => 'required|string',
'command' => 'required|string',
'frequency' => 'required|string',
'container' => 'nullable|string',
+ 'timeout' => 'required|integer|min:60|max:3600',
];
protected $validationAttributes = [
@@ -46,6 +49,7 @@ class Add extends Component
'command' => 'command',
'frequency' => 'frequency',
'container' => 'container',
+ 'timeout' => 'timeout',
];
public function mount()
@@ -103,6 +107,7 @@ public function saveScheduledTask()
$task->command = $this->command;
$task->frequency = $this->frequency;
$task->container = $this->container;
+ $task->timeout = $this->timeout;
$task->team_id = currentTeam()->id;
switch ($this->type) {
@@ -130,5 +135,6 @@ public function clear()
$this->command = '';
$this->frequency = '';
$this->container = '';
+ $this->timeout = 300;
}
}
diff --git a/app/Livewire/Project/Shared/ScheduledTask/Show.php b/app/Livewire/Project/Shared/ScheduledTask/Show.php
index c8d07ae36..920a0efe5 100644
--- a/app/Livewire/Project/Shared/ScheduledTask/Show.php
+++ b/app/Livewire/Project/Shared/ScheduledTask/Show.php
@@ -40,6 +40,9 @@ class Show extends Component
#[Validate(['string', 'nullable'])]
public ?string $container = null;
+ #[Validate(['integer', 'required', 'min:60', 'max:3600'])]
+ public int $timeout = 300;
+
#[Locked]
public ?string $application_uuid;
@@ -99,6 +102,7 @@ public function syncData(bool $toModel = false)
$this->task->command = str($this->command)->trim()->value();
$this->task->frequency = str($this->frequency)->trim()->value();
$this->task->container = str($this->container)->trim()->value();
+ $this->task->timeout = $this->timeout;
$this->task->save();
} else {
$this->isEnabled = $this->task->enabled;
@@ -106,6 +110,7 @@ public function syncData(bool $toModel = false)
$this->command = $this->task->command;
$this->frequency = $this->task->frequency;
$this->container = $this->task->container;
+ $this->timeout = $this->task->timeout ?? 300;
}
}
diff --git a/app/Models/ScheduledTask.php b/app/Models/ScheduledTask.php
index 06903ffb6..bada0b7a5 100644
--- a/app/Models/ScheduledTask.php
+++ b/app/Models/ScheduledTask.php
@@ -12,6 +12,14 @@ class ScheduledTask extends BaseModel
protected $guarded = [];
+ protected function casts(): array
+ {
+ return [
+ 'enabled' => 'boolean',
+ 'timeout' => 'integer',
+ ];
+ }
+
public function service()
{
return $this->belongsTo(Service::class);
diff --git a/app/Models/ScheduledTaskExecution.php b/app/Models/ScheduledTaskExecution.php
index de13fefb0..02fd6917a 100644
--- a/app/Models/ScheduledTaskExecution.php
+++ b/app/Models/ScheduledTaskExecution.php
@@ -8,6 +8,16 @@ class ScheduledTaskExecution extends BaseModel
{
protected $guarded = [];
+ protected function casts(): array
+ {
+ return [
+ 'started_at' => 'datetime',
+ 'finished_at' => 'datetime',
+ 'retry_count' => 'integer',
+ 'duration' => 'decimal:2',
+ ];
+ }
+
public function scheduledTask(): BelongsTo
{
return $this->belongsTo(ScheduledTask::class);
diff --git a/config/logging.php b/config/logging.php
index 488327414..1a75978f3 100644
--- a/config/logging.php
+++ b/config/logging.php
@@ -129,8 +129,8 @@
'scheduled-errors' => [
'driver' => 'daily',
'path' => storage_path('logs/scheduled-errors.log'),
- 'level' => 'debug',
- 'days' => 7,
+ 'level' => 'warning',
+ 'days' => 14,
],
],
diff --git a/database/migrations/2025_11_09_000001_add_timeout_to_scheduled_tasks_table.php b/database/migrations/2025_11_09_000001_add_timeout_to_scheduled_tasks_table.php
new file mode 100644
index 000000000..067861e16
--- /dev/null
+++ b/database/migrations/2025_11_09_000001_add_timeout_to_scheduled_tasks_table.php
@@ -0,0 +1,28 @@
+integer('timeout')->default(300)->after('frequency');
+ });
+ }
+
+ /**
+ * Reverse the migrations.
+ */
+ public function down(): void
+ {
+ Schema::table('scheduled_tasks', function (Blueprint $table) {
+ $table->dropColumn('timeout');
+ });
+ }
+};
diff --git a/database/migrations/2025_11_09_000002_improve_scheduled_task_executions_tracking.php b/database/migrations/2025_11_09_000002_improve_scheduled_task_executions_tracking.php
new file mode 100644
index 000000000..14fdd5998
--- /dev/null
+++ b/database/migrations/2025_11_09_000002_improve_scheduled_task_executions_tracking.php
@@ -0,0 +1,31 @@
+timestamp('started_at')->nullable()->after('scheduled_task_id');
+ $table->integer('retry_count')->default(0)->after('status');
+ $table->decimal('duration', 10, 2)->nullable()->after('retry_count')->comment('Duration in seconds');
+ $table->text('error_details')->nullable()->after('message');
+ });
+ }
+
+ /**
+ * Reverse the migrations.
+ */
+ public function down(): void
+ {
+ Schema::table('scheduled_task_executions', function (Blueprint $table) {
+ $table->dropColumn(['started_at', 'retry_count', 'duration', 'error_details']);
+ });
+ }
+};
diff --git a/resources/views/livewire/project/shared/scheduled-task/add.blade.php b/resources/views/livewire/project/shared/scheduled-task/add.blade.php
index 0c4b8a4d6..6fa04c28b 100644
--- a/resources/views/livewire/project/shared/scheduled-task/add.blade.php
+++ b/resources/views/livewire/project/shared/scheduled-task/add.blade.php
@@ -4,6 +4,9 @@
+
@if ($type === 'application')
@if ($containerNames->count() > 1)
diff --git a/resources/views/livewire/project/shared/scheduled-task/show.blade.php b/resources/views/livewire/project/shared/scheduled-task/show.blade.php
index 1ede7775a..fa2ce0ad9 100644
--- a/resources/views/livewire/project/shared/scheduled-task/show.blade.php
+++ b/resources/views/livewire/project/shared/scheduled-task/show.blade.php
@@ -35,6 +35,8 @@
+
@if ($type === 'application')
first();
+
+ if (! $server) {
+ $this->markTestSkipped('No servers available for testing');
+ }
+
+ Queue::fake();
+
+ // Create an activity for the task
+ $activity = activity()
+ ->withProperties([
+ 'server_uuid' => $server->uuid,
+ 'command' => 'echo "test"',
+ 'type' => 'inline',
+ ])
+ ->event('inline')
+ ->log('[]');
+
+ // Dispatch the job
+ CoolifyTask::dispatch(
+ activity: $activity,
+ ignore_errors: false,
+ call_event_on_finish: null,
+ call_event_data: null
+ );
+
+ // Assert job was dispatched
+ Queue::assertPushed(CoolifyTask::class);
+});
+
+it('has correct retry configuration on CoolifyTask', function () {
+ $server = Server::where('ip', '!=', '1.2.3.4')->first();
+
+ if (! $server) {
+ $this->markTestSkipped('No servers available for testing');
+ }
+
+ $activity = activity()
+ ->withProperties([
+ 'server_uuid' => $server->uuid,
+ 'command' => 'echo "test"',
+ 'type' => 'inline',
+ ])
+ ->event('inline')
+ ->log('[]');
+
+ $job = new CoolifyTask(
+ activity: $activity,
+ ignore_errors: false,
+ call_event_on_finish: null,
+ call_event_data: null
+ );
+
+ // Assert retry configuration
+ expect($job->tries)->toBe(3);
+ expect($job->maxExceptions)->toBe(1);
+ expect($job->timeout)->toBe(600);
+ expect($job->backoff())->toBe([30, 90, 180]);
+});
diff --git a/tests/Feature/StartupExecutionCleanupTest.php b/tests/Feature/StartupExecutionCleanupTest.php
new file mode 100644
index 000000000..3a6b00208
--- /dev/null
+++ b/tests/Feature/StartupExecutionCleanupTest.php
@@ -0,0 +1,216 @@
+create();
+
+ // Create a scheduled task
+ $scheduledTask = ScheduledTask::factory()->create([
+ 'team_id' => $team->id,
+ ]);
+
+ // Create multiple task executions with 'running' status
+ $runningExecution1 = ScheduledTaskExecution::create([
+ 'scheduled_task_id' => $scheduledTask->id,
+ 'status' => 'running',
+ 'started_at' => Carbon::now()->subMinutes(10),
+ ]);
+
+ $runningExecution2 = ScheduledTaskExecution::create([
+ 'scheduled_task_id' => $scheduledTask->id,
+ 'status' => 'running',
+ 'started_at' => Carbon::now()->subMinutes(5),
+ ]);
+
+ // Create a completed execution (should not be affected)
+ $completedExecution = ScheduledTaskExecution::create([
+ 'scheduled_task_id' => $scheduledTask->id,
+ 'status' => 'success',
+ 'started_at' => Carbon::now()->subMinutes(15),
+ 'finished_at' => Carbon::now()->subMinutes(14),
+ ]);
+
+ // Run the app:init command
+ Artisan::call('app:init');
+
+ // Refresh models from database
+ $runningExecution1->refresh();
+ $runningExecution2->refresh();
+ $completedExecution->refresh();
+
+ // Assert running executions are now failed
+ expect($runningExecution1->status)->toBe('failed')
+ ->and($runningExecution1->message)->toBe('Marked as failed during Coolify startup - job was interrupted')
+ ->and($runningExecution1->finished_at)->not->toBeNull()
+ ->and($runningExecution1->finished_at->toDateTimeString())->toBe('2025-01-15 12:00:00');
+
+ expect($runningExecution2->status)->toBe('failed')
+ ->and($runningExecution2->message)->toBe('Marked as failed during Coolify startup - job was interrupted')
+ ->and($runningExecution2->finished_at)->not->toBeNull();
+
+ // Assert completed execution is unchanged
+ expect($completedExecution->status)->toBe('success')
+ ->and($completedExecution->message)->toBeNull();
+
+ // Assert NO notifications were sent
+ Notification::assertNothingSent();
+});
+
+test('app:init marks stuck database backup executions as failed', function () {
+ // Create a team for the scheduled backup
+ $team = Team::factory()->create();
+
+ // Create a database
+ $database = StandalonePostgresql::factory()->create([
+ 'team_id' => $team->id,
+ ]);
+
+ // Create a scheduled backup
+ $scheduledBackup = ScheduledDatabaseBackup::factory()->create([
+ 'team_id' => $team->id,
+ 'database_id' => $database->id,
+ 'database_type' => StandalonePostgresql::class,
+ ]);
+
+ // Create multiple backup executions with 'running' status
+ $runningBackup1 = ScheduledDatabaseBackupExecution::create([
+ 'scheduled_database_backup_id' => $scheduledBackup->id,
+ 'status' => 'running',
+ 'database_name' => 'test_db',
+ ]);
+
+ $runningBackup2 = ScheduledDatabaseBackupExecution::create([
+ 'scheduled_database_backup_id' => $scheduledBackup->id,
+ 'status' => 'running',
+ 'database_name' => 'test_db_2',
+ ]);
+
+ // Create a successful backup (should not be affected)
+ $successfulBackup = ScheduledDatabaseBackupExecution::create([
+ 'scheduled_database_backup_id' => $scheduledBackup->id,
+ 'status' => 'success',
+ 'database_name' => 'test_db_3',
+ 'finished_at' => Carbon::now()->subMinutes(20),
+ ]);
+
+ // Run the app:init command
+ Artisan::call('app:init');
+
+ // Refresh models from database
+ $runningBackup1->refresh();
+ $runningBackup2->refresh();
+ $successfulBackup->refresh();
+
+ // Assert running backups are now failed
+ expect($runningBackup1->status)->toBe('failed')
+ ->and($runningBackup1->message)->toBe('Marked as failed during Coolify startup - job was interrupted')
+ ->and($runningBackup1->finished_at)->not->toBeNull()
+ ->and($runningBackup1->finished_at->toDateTimeString())->toBe('2025-01-15 12:00:00');
+
+ expect($runningBackup2->status)->toBe('failed')
+ ->and($runningBackup2->message)->toBe('Marked as failed during Coolify startup - job was interrupted')
+ ->and($runningBackup2->finished_at)->not->toBeNull();
+
+ // Assert successful backup is unchanged
+ expect($successfulBackup->status)->toBe('success')
+ ->and($successfulBackup->message)->toBeNull();
+
+ // Assert NO notifications were sent
+ Notification::assertNothingSent();
+});
+
+test('app:init handles cleanup when no stuck executions exist', function () {
+ // Create a team
+ $team = Team::factory()->create();
+
+ // Create a scheduled task
+ $scheduledTask = ScheduledTask::factory()->create([
+ 'team_id' => $team->id,
+ ]);
+
+ // Create only completed executions
+ ScheduledTaskExecution::create([
+ 'scheduled_task_id' => $scheduledTask->id,
+ 'status' => 'success',
+ 'started_at' => Carbon::now()->subMinutes(10),
+ 'finished_at' => Carbon::now()->subMinutes(9),
+ ]);
+
+ ScheduledTaskExecution::create([
+ 'scheduled_task_id' => $scheduledTask->id,
+ 'status' => 'failed',
+ 'started_at' => Carbon::now()->subMinutes(20),
+ 'finished_at' => Carbon::now()->subMinutes(19),
+ ]);
+
+ // Run the app:init command (should not fail)
+ $exitCode = Artisan::call('app:init');
+
+ // Assert command succeeded
+ expect($exitCode)->toBe(0);
+
+ // Assert all executions remain unchanged
+ expect(ScheduledTaskExecution::where('status', 'running')->count())->toBe(0)
+ ->and(ScheduledTaskExecution::where('status', 'success')->count())->toBe(1)
+ ->and(ScheduledTaskExecution::where('status', 'failed')->count())->toBe(1);
+
+ // Assert NO notifications were sent
+ Notification::assertNothingSent();
+});
+
+test('cleanup does not send notifications even when team has notification settings', function () {
+ // Create a team with notification settings enabled
+ $team = Team::factory()->create([
+ 'smtp_enabled' => true,
+ 'smtp_from_address' => 'test@example.com',
+ ]);
+
+ // Create a scheduled task
+ $scheduledTask = ScheduledTask::factory()->create([
+ 'team_id' => $team->id,
+ ]);
+
+ // Create a running execution
+ $runningExecution = ScheduledTaskExecution::create([
+ 'scheduled_task_id' => $scheduledTask->id,
+ 'status' => 'running',
+ 'started_at' => Carbon::now()->subMinutes(5),
+ ]);
+
+ // Run the app:init command
+ Artisan::call('app:init');
+
+ // Refresh model
+ $runningExecution->refresh();
+
+ // Assert execution is failed
+ expect($runningExecution->status)->toBe('failed');
+
+ // Assert NO notifications were sent despite team having notification settings
+ Notification::assertNothingSent();
+});
diff --git a/tests/Unit/ScheduledJobsRetryConfigTest.php b/tests/Unit/ScheduledJobsRetryConfigTest.php
new file mode 100644
index 000000000..f46cb9fd1
--- /dev/null
+++ b/tests/Unit/ScheduledJobsRetryConfigTest.php
@@ -0,0 +1,77 @@
+hasProperty('tries'))->toBeTrue()
+ ->and($reflection->hasProperty('maxExceptions'))->toBeTrue()
+ ->and($reflection->hasProperty('timeout'))->toBeTrue()
+ ->and($reflection->hasMethod('backoff'))->toBeTrue();
+
+ // Get default values from class definition
+ $defaultProperties = $reflection->getDefaultProperties();
+
+ expect($defaultProperties['tries'])->toBe(3)
+ ->and($defaultProperties['maxExceptions'])->toBe(1)
+ ->and($defaultProperties['timeout'])->toBe(600);
+});
+
+it('ScheduledTaskJob has correct retry properties defined', function () {
+ $reflection = new ReflectionClass(ScheduledTaskJob::class);
+
+ // Check public properties exist
+ expect($reflection->hasProperty('tries'))->toBeTrue()
+ ->and($reflection->hasProperty('maxExceptions'))->toBeTrue()
+ ->and($reflection->hasProperty('timeout'))->toBeTrue()
+ ->and($reflection->hasMethod('backoff'))->toBeTrue()
+ ->and($reflection->hasMethod('failed'))->toBeTrue();
+
+ // Get default values from class definition
+ $defaultProperties = $reflection->getDefaultProperties();
+
+ expect($defaultProperties['tries'])->toBe(3)
+ ->and($defaultProperties['maxExceptions'])->toBe(1)
+ ->and($defaultProperties['timeout'])->toBe(300);
+});
+
+it('DatabaseBackupJob has correct retry properties defined', function () {
+ $reflection = new ReflectionClass(DatabaseBackupJob::class);
+
+ // Check public properties exist
+ expect($reflection->hasProperty('tries'))->toBeTrue()
+ ->and($reflection->hasProperty('maxExceptions'))->toBeTrue()
+ ->and($reflection->hasProperty('timeout'))->toBeTrue()
+ ->and($reflection->hasMethod('backoff'))->toBeTrue()
+ ->and($reflection->hasMethod('failed'))->toBeTrue();
+
+ // Get default values from class definition
+ $defaultProperties = $reflection->getDefaultProperties();
+
+ expect($defaultProperties['tries'])->toBe(2)
+ ->and($defaultProperties['maxExceptions'])->toBe(1)
+ ->and($defaultProperties['timeout'])->toBe(3600);
+});
+
+it('DatabaseBackupJob enforces minimum timeout of 60 seconds', function () {
+ // Read the constructor to verify minimum timeout enforcement
+ $reflection = new ReflectionClass(DatabaseBackupJob::class);
+ $constructor = $reflection->getMethod('__construct');
+
+ // Get the constructor source
+ $filename = $reflection->getFileName();
+ $startLine = $constructor->getStartLine();
+ $endLine = $constructor->getEndLine();
+
+ $source = file($filename);
+ $constructorSource = implode('', array_slice($source, $startLine - 1, $endLine - $startLine + 1));
+
+ // Verify the implementation enforces minimum of 60 seconds
+ expect($constructorSource)
+ ->toContain('max(')
+ ->toContain('60');
+});
diff --git a/tests/Unit/ScheduledTaskJobTimeoutTest.php b/tests/Unit/ScheduledTaskJobTimeoutTest.php
new file mode 100644
index 000000000..99117fbca
--- /dev/null
+++ b/tests/Unit/ScheduledTaskJobTimeoutTest.php
@@ -0,0 +1,96 @@
+hasProperty('executionId'))->toBeTrue();
+
+ // Verify it's protected (will be serialized with the job)
+ $property = $reflection->getProperty('executionId');
+ expect($property->isProtected())->toBeTrue();
+});
+
+it('has failed method that handles job failures', function () {
+ $reflection = new ReflectionClass(ScheduledTaskJob::class);
+
+ // Verify failed() method exists
+ expect($reflection->hasMethod('failed'))->toBeTrue();
+
+ // Verify it accepts a Throwable parameter
+ $method = $reflection->getMethod('failed');
+ $parameters = $method->getParameters();
+
+ expect($parameters)->toHaveCount(1);
+ expect($parameters[0]->getName())->toBe('exception');
+ expect($parameters[0]->allowsNull())->toBeTrue();
+});
+
+it('failed method implementation reloads execution from database', function () {
+ // Read the failed() method source code to verify it reloads from database
+ $reflection = new ReflectionClass(ScheduledTaskJob::class);
+ $method = $reflection->getMethod('failed');
+
+ // Get the file and method source
+ $filename = $reflection->getFileName();
+ $startLine = $method->getStartLine();
+ $endLine = $method->getEndLine();
+
+ $source = file($filename);
+ $methodSource = implode('', array_slice($source, $startLine - 1, $endLine - $startLine + 1));
+
+ // Verify the implementation includes reloading from database
+ expect($methodSource)
+ ->toContain('$this->executionId')
+ ->toContain('ScheduledTaskExecution::find')
+ ->toContain('ScheduledTaskExecution::query')
+ ->toContain('scheduled_task_id')
+ ->toContain('orderBy')
+ ->toContain('status')
+ ->toContain('failed')
+ ->toContain('notify');
+});
+
+it('failed method updates execution with error_details field', function () {
+ // Read the failed() method source code to verify error_details is populated
+ $reflection = new ReflectionClass(ScheduledTaskJob::class);
+ $method = $reflection->getMethod('failed');
+
+ // Get the file and method source
+ $filename = $reflection->getFileName();
+ $startLine = $method->getStartLine();
+ $endLine = $method->getEndLine();
+
+ $source = file($filename);
+ $methodSource = implode('', array_slice($source, $startLine - 1, $endLine - $startLine + 1));
+
+ // Verify the implementation populates error_details field
+ expect($methodSource)->toContain('error_details');
+});
+
+it('failed method logs when execution cannot be found', function () {
+ // Read the failed() method source code to verify defensive logging
+ $reflection = new ReflectionClass(ScheduledTaskJob::class);
+ $method = $reflection->getMethod('failed');
+
+ // Get the file and method source
+ $filename = $reflection->getFileName();
+ $startLine = $method->getStartLine();
+ $endLine = $method->getEndLine();
+
+ $source = file($filename);
+ $methodSource = implode('', array_slice($source, $startLine - 1, $endLine - $startLine + 1));
+
+ // Verify the implementation logs a warning if execution is not found
+ expect($methodSource)
+ ->toContain('Could not find execution log')
+ ->toContain('warning');
+});
diff --git a/tests/Unit/StartupExecutionCleanupTest.php b/tests/Unit/StartupExecutionCleanupTest.php
new file mode 100644
index 000000000..1fae590eb
--- /dev/null
+++ b/tests/Unit/StartupExecutionCleanupTest.php
@@ -0,0 +1,116 @@
+shouldReceive('where')
+ ->once()
+ ->with('status', 'running')
+ ->andReturnSelf();
+
+ // Expect update to be called with correct parameters
+ $mockBuilder->shouldReceive('update')
+ ->once()
+ ->with([
+ 'status' => 'failed',
+ 'message' => 'Marked as failed during Coolify startup - job was interrupted',
+ 'finished_at' => Carbon::now(),
+ ])
+ ->andReturn(2); // Simulate 2 records updated
+
+ // Execute the cleanup logic directly
+ $updatedCount = ScheduledTaskExecution::where('status', 'running')->update([
+ 'status' => 'failed',
+ 'message' => 'Marked as failed during Coolify startup - job was interrupted',
+ 'finished_at' => Carbon::now(),
+ ]);
+
+ // Assert the count is correct
+ expect($updatedCount)->toBe(2);
+});
+
+it('marks stuck database backup executions as failed without triggering notifications', function () {
+ // Mock the ScheduledDatabaseBackupExecution model
+ $mockBuilder = \Mockery::mock('alias:'.ScheduledDatabaseBackupExecution::class);
+
+ // Expect where clause to be called with 'running' status
+ $mockBuilder->shouldReceive('where')
+ ->once()
+ ->with('status', 'running')
+ ->andReturnSelf();
+
+ // Expect update to be called with correct parameters
+ $mockBuilder->shouldReceive('update')
+ ->once()
+ ->with([
+ 'status' => 'failed',
+ 'message' => 'Marked as failed during Coolify startup - job was interrupted',
+ 'finished_at' => Carbon::now(),
+ ])
+ ->andReturn(3); // Simulate 3 records updated
+
+ // Execute the cleanup logic directly
+ $updatedCount = ScheduledDatabaseBackupExecution::where('status', 'running')->update([
+ 'status' => 'failed',
+ 'message' => 'Marked as failed during Coolify startup - job was interrupted',
+ 'finished_at' => Carbon::now(),
+ ]);
+
+ // Assert the count is correct
+ expect($updatedCount)->toBe(3);
+});
+
+it('handles cleanup when no stuck executions exist', function () {
+ // Mock the ScheduledTaskExecution model
+ $mockBuilder = \Mockery::mock('alias:'.ScheduledTaskExecution::class);
+
+ $mockBuilder->shouldReceive('where')
+ ->once()
+ ->with('status', 'running')
+ ->andReturnSelf();
+
+ $mockBuilder->shouldReceive('update')
+ ->once()
+ ->andReturn(0); // No records updated
+
+ $updatedCount = ScheduledTaskExecution::where('status', 'running')->update([
+ 'status' => 'failed',
+ 'message' => 'Marked as failed during Coolify startup - job was interrupted',
+ 'finished_at' => Carbon::now(),
+ ]);
+
+ expect($updatedCount)->toBe(0);
+});
+
+it('uses correct failure message for interrupted jobs', function () {
+ $expectedMessage = 'Marked as failed during Coolify startup - job was interrupted';
+
+ // Verify the message clearly indicates the job was interrupted during startup
+ expect($expectedMessage)
+ ->toContain('Coolify startup')
+ ->toContain('interrupted')
+ ->toContain('failed');
+});
+
+it('sets finished_at timestamp when marking executions as failed', function () {
+ $now = Carbon::now();
+
+ // Verify Carbon::now() is used for finished_at
+ expect($now)->toBeInstanceOf(Carbon::class)
+ ->and($now->toDateTimeString())->toBe('2025-01-15 12:00:00');
+});