Enhance scheduled tasks with improved retry and timeout features (#7177)

This commit is contained in:
Andras Bacsai 2025-11-11 12:36:53 +01:00 committed by GitHub
commit 3def8ce5f7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 1054 additions and 10 deletions

View file

@ -7,7 +7,7 @@
class CleanupRedis extends Command
{
protected $signature = 'cleanup:redis {--dry-run : Show what would be deleted without actually deleting} {--skip-overlapping : Skip overlapping queue cleanup} {--clear-locks : Clear stale WithoutOverlapping locks}';
protected $signature = 'cleanup:redis {--dry-run : Show what would be deleted without actually deleting} {--skip-overlapping : Skip overlapping queue cleanup} {--clear-locks : Clear stale WithoutOverlapping locks} {--restart : Aggressive cleanup mode for system restart (marks all processing jobs as failed)}';
protected $description = 'Cleanup Redis (Horizon jobs, metrics, overlapping queues, cache locks, and related data)';
@ -63,6 +63,14 @@ public function handle()
$deletedCount += $locksCleaned;
}
// Clean up stuck jobs (restart mode = aggressive, runtime mode = conservative)
$isRestart = $this->option('restart');
if ($isRestart || $this->option('clear-locks')) {
$this->info($isRestart ? 'Cleaning up stuck jobs (RESTART MODE - aggressive)...' : 'Checking for stuck jobs (runtime mode - conservative)...');
$jobsCleaned = $this->cleanupStuckJobs($redis, $prefix, $dryRun, $isRestart);
$deletedCount += $jobsCleaned;
}
if ($dryRun) {
$this->info("DRY RUN: Would delete {$deletedCount} out of {$totalKeys} keys");
} else {
@ -332,4 +340,98 @@ private function cleanupCacheLocks(bool $dryRun): int
return $cleanedCount;
}
/**
* Clean up stuck jobs based on mode (restart vs runtime).
*
* @param mixed $redis Redis connection
* @param string $prefix Horizon prefix
* @param bool $dryRun Dry run mode
* @param bool $isRestart Restart mode (aggressive) vs runtime mode (conservative)
* @return int Number of jobs cleaned
*/
private function cleanupStuckJobs($redis, string $prefix, bool $dryRun, bool $isRestart): int
{
$cleanedCount = 0;
$now = time();
// Get all keys with the horizon prefix
$keys = $redis->keys('*');
foreach ($keys as $key) {
$keyWithoutPrefix = str_replace($prefix, '', $key);
$type = $redis->command('type', [$keyWithoutPrefix]);
// Only process hash-type keys (individual jobs)
if ($type !== 5) {
continue;
}
$data = $redis->command('hgetall', [$keyWithoutPrefix]);
$status = data_get($data, 'status');
$payload = data_get($data, 'payload');
// Only process jobs in "processing" or "reserved" state
if (! in_array($status, ['processing', 'reserved'])) {
continue;
}
// Parse job payload to get job class and started time
$payloadData = json_decode($payload, true);
$jobClass = data_get($payloadData, 'displayName', 'Unknown');
$pushedAt = (int) data_get($data, 'pushed_at', 0);
// Calculate how long the job has been processing
$processingTime = $now - $pushedAt;
$shouldFail = false;
$reason = '';
if ($isRestart) {
// RESTART MODE: Mark ALL processing/reserved jobs as failed
// Safe because all workers are dead on restart
$shouldFail = true;
$reason = 'System restart - all workers terminated';
} else {
// RUNTIME MODE: Only mark truly stuck jobs as failed
// Be conservative to avoid killing legitimate long-running jobs
// Skip ApplicationDeploymentJob entirely (has dynamic_timeout, can run 2+ hours)
if (str_contains($jobClass, 'ApplicationDeploymentJob')) {
continue;
}
// Skip DatabaseBackupJob (large backups can take hours)
if (str_contains($jobClass, 'DatabaseBackupJob')) {
continue;
}
// For other jobs, only fail if processing > 12 hours
if ($processingTime > 43200) { // 12 hours
$shouldFail = true;
$reason = 'Processing for more than 12 hours';
}
}
if ($shouldFail) {
if ($dryRun) {
$this->warn(" Would mark as FAILED: {$jobClass} (processing for ".round($processingTime / 60, 1)." min) - {$reason}");
} else {
// Mark job as failed
$redis->command('hset', [$keyWithoutPrefix, 'status', 'failed']);
$redis->command('hset', [$keyWithoutPrefix, 'failed_at', $now]);
$redis->command('hset', [$keyWithoutPrefix, 'exception', "Job cleaned up by cleanup:redis - {$reason}"]);
$this->info(" ✓ Marked as FAILED: {$jobClass} (processing for ".round($processingTime / 60, 1).' min) - '.$reason);
}
$cleanedCount++;
}
}
if ($cleanedCount === 0) {
$this->info($isRestart ? ' No jobs to clean up' : ' No stuck jobs found (all jobs running normally)');
}
return $cleanedCount;
}
}

View file

@ -4,6 +4,9 @@
use App\Jobs\CheckHelperImageJob;
use App\Models\InstanceSettings;
use App\Models\ScheduledDatabaseBackupExecution;
use App\Models\ScheduledTaskExecution;
use Carbon\Carbon;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Artisan;
@ -45,6 +48,44 @@ public function init()
} else {
echo "Instance already initialized.\n";
}
// Clean up stuck jobs and stale locks on development startup
try {
echo "Cleaning up Redis (stuck jobs and stale locks)...\n";
Artisan::call('cleanup:redis', ['--restart' => true, '--clear-locks' => true]);
echo "Redis cleanup completed.\n";
} catch (\Throwable $e) {
echo "Error in cleanup:redis: {$e->getMessage()}\n";
}
try {
$updatedTaskCount = ScheduledTaskExecution::where('status', 'running')->update([
'status' => 'failed',
'message' => 'Marked as failed during Coolify startup - job was interrupted',
'finished_at' => Carbon::now(),
]);
if ($updatedTaskCount > 0) {
echo "Marked {$updatedTaskCount} stuck scheduled task executions as failed\n";
}
} catch (\Throwable $e) {
echo "Could not cleanup stuck scheduled task executions: {$e->getMessage()}\n";
}
try {
$updatedBackupCount = ScheduledDatabaseBackupExecution::where('status', 'running')->update([
'status' => 'failed',
'message' => 'Marked as failed during Coolify startup - job was interrupted',
'finished_at' => Carbon::now(),
]);
if ($updatedBackupCount > 0) {
echo "Marked {$updatedBackupCount} stuck database backup executions as failed\n";
}
} catch (\Throwable $e) {
echo "Could not cleanup stuck database backup executions: {$e->getMessage()}\n";
}
CheckHelperImageJob::dispatch();
}
}

View file

@ -10,9 +10,12 @@
use App\Models\Environment;
use App\Models\InstanceSettings;
use App\Models\ScheduledDatabaseBackup;
use App\Models\ScheduledDatabaseBackupExecution;
use App\Models\ScheduledTaskExecution;
use App\Models\Server;
use App\Models\StandalonePostgresql;
use App\Models\User;
use Carbon\Carbon;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Artisan;
use Illuminate\Support\Facades\File;
@ -73,7 +76,7 @@ public function handle()
$this->cleanupUnusedNetworkFromCoolifyProxy();
try {
$this->call('cleanup:redis', ['--clear-locks' => true]);
$this->call('cleanup:redis', ['--restart' => true, '--clear-locks' => true]);
} catch (\Throwable $e) {
echo "Error in cleanup:redis command: {$e->getMessage()}\n";
}
@ -103,6 +106,34 @@ public function handle()
echo "Could not cleanup inprogress deployments: {$e->getMessage()}\n";
}
try {
$updatedTaskCount = ScheduledTaskExecution::where('status', 'running')->update([
'status' => 'failed',
'message' => 'Marked as failed during Coolify startup - job was interrupted',
'finished_at' => Carbon::now(),
]);
if ($updatedTaskCount > 0) {
echo "Marked {$updatedTaskCount} stuck scheduled task executions as failed\n";
}
} catch (\Throwable $e) {
echo "Could not cleanup stuck scheduled task executions: {$e->getMessage()}\n";
}
try {
$updatedBackupCount = ScheduledDatabaseBackupExecution::where('status', 'running')->update([
'status' => 'failed',
'message' => 'Marked as failed during Coolify startup - job was interrupted',
'finished_at' => Carbon::now(),
]);
if ($updatedBackupCount > 0) {
echo "Marked {$updatedBackupCount} stuck database backup executions as failed\n";
}
} catch (\Throwable $e) {
echo "Could not cleanup stuck database backup executions: {$e->getMessage()}\n";
}
try {
$localhost = $this->servers->where('id', 0)->first();
if ($localhost) {

View file

@ -3,18 +3,35 @@
namespace App\Jobs;
use App\Actions\CoolifyTask\RunRemoteProcess;
use App\Enums\ProcessStatus;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldBeEncrypted;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\Log;
use Spatie\Activitylog\Models\Activity;
class CoolifyTask implements ShouldBeEncrypted, ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
/**
* The number of times the job may be attempted.
*/
public $tries = 3;
/**
* The maximum number of unhandled exceptions to allow before failing.
*/
public $maxExceptions = 1;
/**
* The number of seconds the job can run before timing out.
*/
public $timeout = 600;
/**
* Create a new job instance.
*/
@ -42,4 +59,36 @@ public function handle(): void
$remote_process();
}
/**
* Calculate the number of seconds to wait before retrying the job.
*/
public function backoff(): array
{
return [30, 90, 180]; // 30s, 90s, 180s between retries
}
/**
* Handle a job failure.
*/
public function failed(?\Throwable $exception): void
{
Log::channel('scheduled-errors')->error('CoolifyTask permanently failed', [
'job' => 'CoolifyTask',
'activity_id' => $this->activity->id,
'server_uuid' => $this->activity->getExtraProperty('server_uuid'),
'command_preview' => substr($this->activity->getExtraProperty('command') ?? '', 0, 200),
'error' => $exception?->getMessage(),
'total_attempts' => $this->attempts(),
'trace' => $exception?->getTraceAsString(),
]);
// Update activity status to reflect permanent failure
$this->activity->properties = $this->activity->properties->merge([
'status' => ProcessStatus::ERROR->value,
'error' => $exception?->getMessage() ?? 'Job permanently failed',
'failed_at' => now()->toIso8601String(),
]);
$this->activity->save();
}
}

View file

@ -23,6 +23,7 @@
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Str;
use Throwable;
use Visus\Cuid2\Cuid2;
@ -31,6 +32,16 @@ class DatabaseBackupJob implements ShouldBeEncrypted, ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
/**
* The number of times the job may be attempted.
*/
public $tries = 2;
/**
* The maximum number of unhandled exceptions to allow before failing.
*/
public $maxExceptions = 1;
public ?Team $team = null;
public Server $server;
@ -74,7 +85,7 @@ class DatabaseBackupJob implements ShouldBeEncrypted, ShouldQueue
public function __construct(public ScheduledDatabaseBackup $backup)
{
$this->onQueue('high');
$this->timeout = $backup->timeout;
$this->timeout = $backup->timeout ?? 3600;
}
public function handle(): void
@ -659,17 +670,42 @@ private function getFullImageName(): string
return "{$helperImage}:{$latestVersion}";
}
/**
* Calculate the number of seconds to wait before retrying the job.
*/
public function backoff(): array
{
return [60, 300]; // 1min, 5min between retries
}
public function failed(?Throwable $exception): void
{
Log::channel('scheduled-errors')->error('DatabaseBackup permanently failed', [
'job' => 'DatabaseBackupJob',
'backup_id' => $this->backup->uuid,
'database' => $this->database?->name ?? 'unknown',
'database_type' => get_class($this->database ?? new \stdClass),
'server' => $this->server?->name ?? 'unknown',
'total_attempts' => $this->attempts(),
'error' => $exception?->getMessage(),
'trace' => $exception?->getTraceAsString(),
]);
$log = ScheduledDatabaseBackupExecution::where('uuid', $this->backup_log_uuid)->first();
if ($log) {
$log->update([
'status' => 'failed',
'message' => 'Job failed: '.($exception?->getMessage() ?? 'Unknown error'),
'message' => 'Job permanently failed after '.$this->attempts().' attempts: '.($exception?->getMessage() ?? 'Unknown error'),
'size' => 0,
'filename' => null,
'finished_at' => Carbon::now(),
]);
}
// Notify team about permanent failure
if ($this->team) {
$this->team->notify(new BackupFailed($this->backup, $this->database, $this->backup_output));
}
}
}

View file

@ -52,7 +52,7 @@ public function middleware(): array
{
return [
(new WithoutOverlapping('scheduled-job-manager'))
->expireAfter(60) // Lock expires after 1 minute to prevent stale locks
->expireAfter(90) // Lock expires after 90s to handle high-load environments with many tasks
->dontRelease(), // Don't re-queue on lock conflict
];
}

View file

@ -18,11 +18,27 @@
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\Log;
class ScheduledTaskJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
/**
* The number of times the job may be attempted.
*/
public $tries = 3;
/**
* The maximum number of unhandled exceptions to allow before failing.
*/
public $maxExceptions = 1;
/**
* The number of seconds the job can run before timing out.
*/
public $timeout = 300;
public Team $team;
public Server $server;
@ -33,6 +49,11 @@ class ScheduledTaskJob implements ShouldQueue
public ?ScheduledTaskExecution $task_log = null;
/**
* Store execution ID to survive job serialization for timeout handling.
*/
protected ?int $executionId = null;
public string $task_status = 'failed';
public ?string $task_output = null;
@ -55,6 +76,9 @@ public function __construct($task)
}
$this->team = Team::findOrFail($task->team_id);
$this->server_timezone = $this->getServerTimezone();
// Set timeout from task configuration
$this->timeout = $this->task->timeout ?? 300;
}
private function getServerTimezone(): string
@ -70,11 +94,18 @@ private function getServerTimezone(): string
public function handle(): void
{
$startTime = Carbon::now();
try {
$this->task_log = ScheduledTaskExecution::create([
'scheduled_task_id' => $this->task->id,
'started_at' => $startTime,
'retry_count' => $this->attempts() - 1,
]);
// Store execution ID for timeout handling
$this->executionId = $this->task_log->id;
$this->server = $this->resource->destination->server;
if ($this->resource->type() === 'application') {
@ -129,15 +160,101 @@ public function handle(): void
'message' => $this->task_output ?? $e->getMessage(),
]);
}
$this->team?->notify(new TaskFailed($this->task, $e->getMessage()));
// Log the error to the scheduled-errors channel
Log::channel('scheduled-errors')->error('ScheduledTask execution failed', [
'job' => 'ScheduledTaskJob',
'task_id' => $this->task->uuid,
'task_name' => $this->task->name,
'server' => $this->server->name ?? 'unknown',
'attempt' => $this->attempts(),
'error' => $e->getMessage(),
]);
// Only notify and throw on final failure
// Re-throw to trigger Laravel's retry mechanism with backoff
throw $e;
} finally {
ScheduledTaskDone::dispatch($this->team->id);
if ($this->task_log) {
$finishedAt = Carbon::now();
$duration = round($startTime->floatDiffInSeconds($finishedAt), 2);
$this->task_log->update([
'finished_at' => Carbon::now()->toImmutable(),
'finished_at' => $finishedAt->toImmutable(),
'duration' => $duration,
]);
}
}
}
/**
* Calculate the number of seconds to wait before retrying the job.
*/
public function backoff(): array
{
return [30, 60, 120]; // 30s, 60s, 120s between retries
}
/**
* Handle a job failure.
*/
public function failed(?\Throwable $exception): void
{
Log::channel('scheduled-errors')->error('ScheduledTask permanently failed', [
'job' => 'ScheduledTaskJob',
'task_id' => $this->task->uuid,
'task_name' => $this->task->name,
'server' => $this->server->name ?? 'unknown',
'total_attempts' => $this->attempts(),
'error' => $exception?->getMessage(),
'trace' => $exception?->getTraceAsString(),
]);
// Reload execution log from database
// When a job times out, failed() is called in a fresh process with the original
// queue payload, so $executionId will be null. We need to query for the latest execution.
$execution = null;
// Try to find execution using stored ID first (works for non-timeout failures)
if ($this->executionId) {
$execution = ScheduledTaskExecution::find($this->executionId);
}
// If no stored ID or not found, query for the most recent execution log for this task
if (! $execution) {
$execution = ScheduledTaskExecution::query()
->where('scheduled_task_id', $this->task->id)
->orderBy('created_at', 'desc')
->first();
}
// Last resort: check task_log property
if (! $execution && $this->task_log) {
$execution = $this->task_log;
}
if ($execution) {
$errorMessage = 'Job permanently failed after '.$this->attempts().' attempts';
if ($exception) {
$errorMessage .= ': '.$exception->getMessage();
}
$execution->update([
'status' => 'failed',
'message' => $errorMessage,
'error_details' => $exception?->getTraceAsString(),
'finished_at' => Carbon::now()->toImmutable(),
]);
} else {
Log::channel('scheduled-errors')->warning('Could not find execution log to update', [
'execution_id' => $this->executionId,
'task_id' => $this->task->uuid,
]);
}
// Notify team about permanent failure
$this->team?->notify(new TaskFailed($this->task, $exception?->getMessage() ?? 'Unknown error'));
}
}

View file

@ -79,7 +79,7 @@ class BackupEdit extends Component
#[Validate(['required', 'boolean'])]
public bool $dumpAll = false;
#[Validate(['required', 'int', 'min:1', 'max:36000'])]
#[Validate(['required', 'int', 'min:60', 'max:36000'])]
public int $timeout = 3600;
public function mount()

View file

@ -34,11 +34,14 @@ class Add extends Component
public ?string $container = '';
public int $timeout = 300;
protected $rules = [
'name' => 'required|string',
'command' => 'required|string',
'frequency' => 'required|string',
'container' => 'nullable|string',
'timeout' => 'required|integer|min:60|max:3600',
];
protected $validationAttributes = [
@ -46,6 +49,7 @@ class Add extends Component
'command' => 'command',
'frequency' => 'frequency',
'container' => 'container',
'timeout' => 'timeout',
];
public function mount()
@ -103,6 +107,7 @@ public function saveScheduledTask()
$task->command = $this->command;
$task->frequency = $this->frequency;
$task->container = $this->container;
$task->timeout = $this->timeout;
$task->team_id = currentTeam()->id;
switch ($this->type) {
@ -130,5 +135,6 @@ public function clear()
$this->command = '';
$this->frequency = '';
$this->container = '';
$this->timeout = 300;
}
}

View file

@ -40,6 +40,9 @@ class Show extends Component
#[Validate(['string', 'nullable'])]
public ?string $container = null;
#[Validate(['integer', 'required', 'min:60', 'max:3600'])]
public int $timeout = 300;
#[Locked]
public ?string $application_uuid;
@ -99,6 +102,7 @@ public function syncData(bool $toModel = false)
$this->task->command = str($this->command)->trim()->value();
$this->task->frequency = str($this->frequency)->trim()->value();
$this->task->container = str($this->container)->trim()->value();
$this->task->timeout = $this->timeout;
$this->task->save();
} else {
$this->isEnabled = $this->task->enabled;
@ -106,6 +110,7 @@ public function syncData(bool $toModel = false)
$this->command = $this->task->command;
$this->frequency = $this->task->frequency;
$this->container = $this->task->container;
$this->timeout = $this->task->timeout ?? 300;
}
}

View file

@ -12,6 +12,14 @@ class ScheduledTask extends BaseModel
protected $guarded = [];
protected function casts(): array
{
return [
'enabled' => 'boolean',
'timeout' => 'integer',
];
}
public function service()
{
return $this->belongsTo(Service::class);

View file

@ -8,6 +8,16 @@ class ScheduledTaskExecution extends BaseModel
{
protected $guarded = [];
protected function casts(): array
{
return [
'started_at' => 'datetime',
'finished_at' => 'datetime',
'retry_count' => 'integer',
'duration' => 'decimal:2',
];
}
public function scheduledTask(): BelongsTo
{
return $this->belongsTo(ScheduledTask::class);

View file

@ -129,8 +129,8 @@
'scheduled-errors' => [
'driver' => 'daily',
'path' => storage_path('logs/scheduled-errors.log'),
'level' => 'debug',
'days' => 7,
'level' => 'warning',
'days' => 14,
],
],

View file

@ -0,0 +1,28 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
/**
* Run the migrations.
*/
public function up(): void
{
Schema::table('scheduled_tasks', function (Blueprint $table) {
$table->integer('timeout')->default(300)->after('frequency');
});
}
/**
* Reverse the migrations.
*/
public function down(): void
{
Schema::table('scheduled_tasks', function (Blueprint $table) {
$table->dropColumn('timeout');
});
}
};

View file

@ -0,0 +1,31 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
/**
* Run the migrations.
*/
public function up(): void
{
Schema::table('scheduled_task_executions', function (Blueprint $table) {
$table->timestamp('started_at')->nullable()->after('scheduled_task_id');
$table->integer('retry_count')->default(0)->after('status');
$table->decimal('duration', 10, 2)->nullable()->after('retry_count')->comment('Duration in seconds');
$table->text('error_details')->nullable()->after('message');
});
}
/**
* Reverse the migrations.
*/
public function down(): void
{
Schema::table('scheduled_task_executions', function (Blueprint $table) {
$table->dropColumn(['started_at', 'retry_count', 'duration', 'error_details']);
});
}
};

View file

@ -4,6 +4,9 @@
<x-forms.input placeholder="0 0 * * * or daily"
helper="You can use every_minute, hourly, daily, weekly, monthly, yearly or a cron expression." id="frequency"
label="Frequency" />
<x-forms.input type="number" placeholder="300" id="timeout"
helper="Maximum execution time in seconds (60-3600). Default is 300 seconds (5 minutes)."
label="Timeout (seconds)" />
@if ($type === 'application')
@if ($containerNames->count() > 1)
<x-forms.select id="container" label="Container name">

View file

@ -35,6 +35,8 @@
<x-forms.input placeholder="Name" id="name" label="Name" required />
<x-forms.input placeholder="php artisan schedule:run" id="command" label="Command" required />
<x-forms.input placeholder="0 0 * * * or daily" id="frequency" label="Frequency" required />
<x-forms.input type="number" placeholder="300" id="timeout"
helper="Maximum execution time in seconds (60-3600)." label="Timeout (seconds)" required />
@if ($type === 'application')
<x-forms.input placeholder="php"
helper="You can leave this empty if your resource only has one container." id="container"

View file

@ -0,0 +1,70 @@
<?php
use App\Jobs\CoolifyTask;
use App\Models\Server;
use Illuminate\Foundation\Testing\RefreshDatabase;
use Illuminate\Support\Facades\Queue;
uses(RefreshDatabase::class);
it('can dispatch CoolifyTask successfully', function () {
// Skip if no servers available
$server = Server::where('ip', '!=', '1.2.3.4')->first();
if (! $server) {
$this->markTestSkipped('No servers available for testing');
}
Queue::fake();
// Create an activity for the task
$activity = activity()
->withProperties([
'server_uuid' => $server->uuid,
'command' => 'echo "test"',
'type' => 'inline',
])
->event('inline')
->log('[]');
// Dispatch the job
CoolifyTask::dispatch(
activity: $activity,
ignore_errors: false,
call_event_on_finish: null,
call_event_data: null
);
// Assert job was dispatched
Queue::assertPushed(CoolifyTask::class);
});
it('has correct retry configuration on CoolifyTask', function () {
$server = Server::where('ip', '!=', '1.2.3.4')->first();
if (! $server) {
$this->markTestSkipped('No servers available for testing');
}
$activity = activity()
->withProperties([
'server_uuid' => $server->uuid,
'command' => 'echo "test"',
'type' => 'inline',
])
->event('inline')
->log('[]');
$job = new CoolifyTask(
activity: $activity,
ignore_errors: false,
call_event_on_finish: null,
call_event_data: null
);
// Assert retry configuration
expect($job->tries)->toBe(3);
expect($job->maxExceptions)->toBe(1);
expect($job->timeout)->toBe(600);
expect($job->backoff())->toBe([30, 90, 180]);
});

View file

@ -0,0 +1,216 @@
<?php
use App\Models\ScheduledDatabaseBackup;
use App\Models\ScheduledDatabaseBackupExecution;
use App\Models\ScheduledTask;
use App\Models\ScheduledTaskExecution;
use App\Models\StandalonePostgresql;
use App\Models\Team;
use Carbon\Carbon;
use Illuminate\Foundation\Testing\RefreshDatabase;
use Illuminate\Support\Facades\Artisan;
use Illuminate\Support\Facades\Notification;
uses(RefreshDatabase::class);
beforeEach(function () {
// Freeze time for consistent testing
Carbon::setTestNow('2025-01-15 12:00:00');
// Fake notifications to ensure none are sent
Notification::fake();
});
afterEach(function () {
Carbon::setTestNow();
});
test('app:init marks stuck scheduled task executions as failed', function () {
// Create a team for the scheduled task
$team = Team::factory()->create();
// Create a scheduled task
$scheduledTask = ScheduledTask::factory()->create([
'team_id' => $team->id,
]);
// Create multiple task executions with 'running' status
$runningExecution1 = ScheduledTaskExecution::create([
'scheduled_task_id' => $scheduledTask->id,
'status' => 'running',
'started_at' => Carbon::now()->subMinutes(10),
]);
$runningExecution2 = ScheduledTaskExecution::create([
'scheduled_task_id' => $scheduledTask->id,
'status' => 'running',
'started_at' => Carbon::now()->subMinutes(5),
]);
// Create a completed execution (should not be affected)
$completedExecution = ScheduledTaskExecution::create([
'scheduled_task_id' => $scheduledTask->id,
'status' => 'success',
'started_at' => Carbon::now()->subMinutes(15),
'finished_at' => Carbon::now()->subMinutes(14),
]);
// Run the app:init command
Artisan::call('app:init');
// Refresh models from database
$runningExecution1->refresh();
$runningExecution2->refresh();
$completedExecution->refresh();
// Assert running executions are now failed
expect($runningExecution1->status)->toBe('failed')
->and($runningExecution1->message)->toBe('Marked as failed during Coolify startup - job was interrupted')
->and($runningExecution1->finished_at)->not->toBeNull()
->and($runningExecution1->finished_at->toDateTimeString())->toBe('2025-01-15 12:00:00');
expect($runningExecution2->status)->toBe('failed')
->and($runningExecution2->message)->toBe('Marked as failed during Coolify startup - job was interrupted')
->and($runningExecution2->finished_at)->not->toBeNull();
// Assert completed execution is unchanged
expect($completedExecution->status)->toBe('success')
->and($completedExecution->message)->toBeNull();
// Assert NO notifications were sent
Notification::assertNothingSent();
});
test('app:init marks stuck database backup executions as failed', function () {
// Create a team for the scheduled backup
$team = Team::factory()->create();
// Create a database
$database = StandalonePostgresql::factory()->create([
'team_id' => $team->id,
]);
// Create a scheduled backup
$scheduledBackup = ScheduledDatabaseBackup::factory()->create([
'team_id' => $team->id,
'database_id' => $database->id,
'database_type' => StandalonePostgresql::class,
]);
// Create multiple backup executions with 'running' status
$runningBackup1 = ScheduledDatabaseBackupExecution::create([
'scheduled_database_backup_id' => $scheduledBackup->id,
'status' => 'running',
'database_name' => 'test_db',
]);
$runningBackup2 = ScheduledDatabaseBackupExecution::create([
'scheduled_database_backup_id' => $scheduledBackup->id,
'status' => 'running',
'database_name' => 'test_db_2',
]);
// Create a successful backup (should not be affected)
$successfulBackup = ScheduledDatabaseBackupExecution::create([
'scheduled_database_backup_id' => $scheduledBackup->id,
'status' => 'success',
'database_name' => 'test_db_3',
'finished_at' => Carbon::now()->subMinutes(20),
]);
// Run the app:init command
Artisan::call('app:init');
// Refresh models from database
$runningBackup1->refresh();
$runningBackup2->refresh();
$successfulBackup->refresh();
// Assert running backups are now failed
expect($runningBackup1->status)->toBe('failed')
->and($runningBackup1->message)->toBe('Marked as failed during Coolify startup - job was interrupted')
->and($runningBackup1->finished_at)->not->toBeNull()
->and($runningBackup1->finished_at->toDateTimeString())->toBe('2025-01-15 12:00:00');
expect($runningBackup2->status)->toBe('failed')
->and($runningBackup2->message)->toBe('Marked as failed during Coolify startup - job was interrupted')
->and($runningBackup2->finished_at)->not->toBeNull();
// Assert successful backup is unchanged
expect($successfulBackup->status)->toBe('success')
->and($successfulBackup->message)->toBeNull();
// Assert NO notifications were sent
Notification::assertNothingSent();
});
test('app:init handles cleanup when no stuck executions exist', function () {
// Create a team
$team = Team::factory()->create();
// Create a scheduled task
$scheduledTask = ScheduledTask::factory()->create([
'team_id' => $team->id,
]);
// Create only completed executions
ScheduledTaskExecution::create([
'scheduled_task_id' => $scheduledTask->id,
'status' => 'success',
'started_at' => Carbon::now()->subMinutes(10),
'finished_at' => Carbon::now()->subMinutes(9),
]);
ScheduledTaskExecution::create([
'scheduled_task_id' => $scheduledTask->id,
'status' => 'failed',
'started_at' => Carbon::now()->subMinutes(20),
'finished_at' => Carbon::now()->subMinutes(19),
]);
// Run the app:init command (should not fail)
$exitCode = Artisan::call('app:init');
// Assert command succeeded
expect($exitCode)->toBe(0);
// Assert all executions remain unchanged
expect(ScheduledTaskExecution::where('status', 'running')->count())->toBe(0)
->and(ScheduledTaskExecution::where('status', 'success')->count())->toBe(1)
->and(ScheduledTaskExecution::where('status', 'failed')->count())->toBe(1);
// Assert NO notifications were sent
Notification::assertNothingSent();
});
test('cleanup does not send notifications even when team has notification settings', function () {
// Create a team with notification settings enabled
$team = Team::factory()->create([
'smtp_enabled' => true,
'smtp_from_address' => 'test@example.com',
]);
// Create a scheduled task
$scheduledTask = ScheduledTask::factory()->create([
'team_id' => $team->id,
]);
// Create a running execution
$runningExecution = ScheduledTaskExecution::create([
'scheduled_task_id' => $scheduledTask->id,
'status' => 'running',
'started_at' => Carbon::now()->subMinutes(5),
]);
// Run the app:init command
Artisan::call('app:init');
// Refresh model
$runningExecution->refresh();
// Assert execution is failed
expect($runningExecution->status)->toBe('failed');
// Assert NO notifications were sent despite team having notification settings
Notification::assertNothingSent();
});

View file

@ -0,0 +1,77 @@
<?php
use App\Jobs\CoolifyTask;
use App\Jobs\DatabaseBackupJob;
use App\Jobs\ScheduledTaskJob;
it('CoolifyTask has correct retry properties defined', function () {
$reflection = new ReflectionClass(CoolifyTask::class);
// Check public properties exist
expect($reflection->hasProperty('tries'))->toBeTrue()
->and($reflection->hasProperty('maxExceptions'))->toBeTrue()
->and($reflection->hasProperty('timeout'))->toBeTrue()
->and($reflection->hasMethod('backoff'))->toBeTrue();
// Get default values from class definition
$defaultProperties = $reflection->getDefaultProperties();
expect($defaultProperties['tries'])->toBe(3)
->and($defaultProperties['maxExceptions'])->toBe(1)
->and($defaultProperties['timeout'])->toBe(600);
});
it('ScheduledTaskJob has correct retry properties defined', function () {
$reflection = new ReflectionClass(ScheduledTaskJob::class);
// Check public properties exist
expect($reflection->hasProperty('tries'))->toBeTrue()
->and($reflection->hasProperty('maxExceptions'))->toBeTrue()
->and($reflection->hasProperty('timeout'))->toBeTrue()
->and($reflection->hasMethod('backoff'))->toBeTrue()
->and($reflection->hasMethod('failed'))->toBeTrue();
// Get default values from class definition
$defaultProperties = $reflection->getDefaultProperties();
expect($defaultProperties['tries'])->toBe(3)
->and($defaultProperties['maxExceptions'])->toBe(1)
->and($defaultProperties['timeout'])->toBe(300);
});
it('DatabaseBackupJob has correct retry properties defined', function () {
$reflection = new ReflectionClass(DatabaseBackupJob::class);
// Check public properties exist
expect($reflection->hasProperty('tries'))->toBeTrue()
->and($reflection->hasProperty('maxExceptions'))->toBeTrue()
->and($reflection->hasProperty('timeout'))->toBeTrue()
->and($reflection->hasMethod('backoff'))->toBeTrue()
->and($reflection->hasMethod('failed'))->toBeTrue();
// Get default values from class definition
$defaultProperties = $reflection->getDefaultProperties();
expect($defaultProperties['tries'])->toBe(2)
->and($defaultProperties['maxExceptions'])->toBe(1)
->and($defaultProperties['timeout'])->toBe(3600);
});
it('DatabaseBackupJob enforces minimum timeout of 60 seconds', function () {
// Read the constructor to verify minimum timeout enforcement
$reflection = new ReflectionClass(DatabaseBackupJob::class);
$constructor = $reflection->getMethod('__construct');
// Get the constructor source
$filename = $reflection->getFileName();
$startLine = $constructor->getStartLine();
$endLine = $constructor->getEndLine();
$source = file($filename);
$constructorSource = implode('', array_slice($source, $startLine - 1, $endLine - $startLine + 1));
// Verify the implementation enforces minimum of 60 seconds
expect($constructorSource)
->toContain('max(')
->toContain('60');
});

View file

@ -0,0 +1,96 @@
<?php
use App\Jobs\ScheduledTaskJob;
use Illuminate\Support\Facades\Log;
beforeEach(function () {
// Mock Log facade to prevent actual logging during tests
Log::spy();
});
it('has executionId property for timeout handling', function () {
$reflection = new ReflectionClass(ScheduledTaskJob::class);
// Verify executionId property exists
expect($reflection->hasProperty('executionId'))->toBeTrue();
// Verify it's protected (will be serialized with the job)
$property = $reflection->getProperty('executionId');
expect($property->isProtected())->toBeTrue();
});
it('has failed method that handles job failures', function () {
$reflection = new ReflectionClass(ScheduledTaskJob::class);
// Verify failed() method exists
expect($reflection->hasMethod('failed'))->toBeTrue();
// Verify it accepts a Throwable parameter
$method = $reflection->getMethod('failed');
$parameters = $method->getParameters();
expect($parameters)->toHaveCount(1);
expect($parameters[0]->getName())->toBe('exception');
expect($parameters[0]->allowsNull())->toBeTrue();
});
it('failed method implementation reloads execution from database', function () {
// Read the failed() method source code to verify it reloads from database
$reflection = new ReflectionClass(ScheduledTaskJob::class);
$method = $reflection->getMethod('failed');
// Get the file and method source
$filename = $reflection->getFileName();
$startLine = $method->getStartLine();
$endLine = $method->getEndLine();
$source = file($filename);
$methodSource = implode('', array_slice($source, $startLine - 1, $endLine - $startLine + 1));
// Verify the implementation includes reloading from database
expect($methodSource)
->toContain('$this->executionId')
->toContain('ScheduledTaskExecution::find')
->toContain('ScheduledTaskExecution::query')
->toContain('scheduled_task_id')
->toContain('orderBy')
->toContain('status')
->toContain('failed')
->toContain('notify');
});
it('failed method updates execution with error_details field', function () {
// Read the failed() method source code to verify error_details is populated
$reflection = new ReflectionClass(ScheduledTaskJob::class);
$method = $reflection->getMethod('failed');
// Get the file and method source
$filename = $reflection->getFileName();
$startLine = $method->getStartLine();
$endLine = $method->getEndLine();
$source = file($filename);
$methodSource = implode('', array_slice($source, $startLine - 1, $endLine - $startLine + 1));
// Verify the implementation populates error_details field
expect($methodSource)->toContain('error_details');
});
it('failed method logs when execution cannot be found', function () {
// Read the failed() method source code to verify defensive logging
$reflection = new ReflectionClass(ScheduledTaskJob::class);
$method = $reflection->getMethod('failed');
// Get the file and method source
$filename = $reflection->getFileName();
$startLine = $method->getStartLine();
$endLine = $method->getEndLine();
$source = file($filename);
$methodSource = implode('', array_slice($source, $startLine - 1, $endLine - $startLine + 1));
// Verify the implementation logs a warning if execution is not found
expect($methodSource)
->toContain('Could not find execution log')
->toContain('warning');
});

View file

@ -0,0 +1,116 @@
<?php
use App\Models\ScheduledDatabaseBackupExecution;
use App\Models\ScheduledTaskExecution;
use Carbon\Carbon;
beforeEach(function () {
Carbon::setTestNow('2025-01-15 12:00:00');
});
afterEach(function () {
Carbon::setTestNow();
\Mockery::close();
});
it('marks stuck scheduled task executions as failed without triggering notifications', function () {
// Mock the ScheduledTaskExecution model
$mockBuilder = \Mockery::mock('alias:'.ScheduledTaskExecution::class);
// Expect where clause to be called with 'running' status
$mockBuilder->shouldReceive('where')
->once()
->with('status', 'running')
->andReturnSelf();
// Expect update to be called with correct parameters
$mockBuilder->shouldReceive('update')
->once()
->with([
'status' => 'failed',
'message' => 'Marked as failed during Coolify startup - job was interrupted',
'finished_at' => Carbon::now(),
])
->andReturn(2); // Simulate 2 records updated
// Execute the cleanup logic directly
$updatedCount = ScheduledTaskExecution::where('status', 'running')->update([
'status' => 'failed',
'message' => 'Marked as failed during Coolify startup - job was interrupted',
'finished_at' => Carbon::now(),
]);
// Assert the count is correct
expect($updatedCount)->toBe(2);
});
it('marks stuck database backup executions as failed without triggering notifications', function () {
// Mock the ScheduledDatabaseBackupExecution model
$mockBuilder = \Mockery::mock('alias:'.ScheduledDatabaseBackupExecution::class);
// Expect where clause to be called with 'running' status
$mockBuilder->shouldReceive('where')
->once()
->with('status', 'running')
->andReturnSelf();
// Expect update to be called with correct parameters
$mockBuilder->shouldReceive('update')
->once()
->with([
'status' => 'failed',
'message' => 'Marked as failed during Coolify startup - job was interrupted',
'finished_at' => Carbon::now(),
])
->andReturn(3); // Simulate 3 records updated
// Execute the cleanup logic directly
$updatedCount = ScheduledDatabaseBackupExecution::where('status', 'running')->update([
'status' => 'failed',
'message' => 'Marked as failed during Coolify startup - job was interrupted',
'finished_at' => Carbon::now(),
]);
// Assert the count is correct
expect($updatedCount)->toBe(3);
});
it('handles cleanup when no stuck executions exist', function () {
// Mock the ScheduledTaskExecution model
$mockBuilder = \Mockery::mock('alias:'.ScheduledTaskExecution::class);
$mockBuilder->shouldReceive('where')
->once()
->with('status', 'running')
->andReturnSelf();
$mockBuilder->shouldReceive('update')
->once()
->andReturn(0); // No records updated
$updatedCount = ScheduledTaskExecution::where('status', 'running')->update([
'status' => 'failed',
'message' => 'Marked as failed during Coolify startup - job was interrupted',
'finished_at' => Carbon::now(),
]);
expect($updatedCount)->toBe(0);
});
it('uses correct failure message for interrupted jobs', function () {
$expectedMessage = 'Marked as failed during Coolify startup - job was interrupted';
// Verify the message clearly indicates the job was interrupted during startup
expect($expectedMessage)
->toContain('Coolify startup')
->toContain('interrupted')
->toContain('failed');
});
it('sets finished_at timestamp when marking executions as failed', function () {
$now = Carbon::now();
// Verify Carbon::now() is used for finished_at
expect($now)->toBeInstanceOf(Carbon::class)
->and($now->toDateTimeString())->toBe('2025-01-15 12:00:00');
});