coolify/app/Jobs/ScheduledTaskJob.php
Andras Bacsai b22e79caec feat(jobs): improve scheduled tasks with retry logic and queue cleanup
- Add retry configuration to CoolifyTask (3 tries, 600s timeout)
- Add retry configuration to ScheduledTaskJob (3 tries, configurable timeout)
- Add retry configuration to DatabaseBackupJob (2 tries)
- Implement exponential backoff for all jobs (30s, 60s, 120s intervals)
- Add failed() handlers with comprehensive error logging to scheduled-errors channel
- Add execution tracking: started_at, retry_count, duration (decimal), error_details
- Add configurable timeout field to scheduled tasks (60-3600s, default 300s)
- Update UI to include timeout configuration in task creation/editing forms
- Increase ScheduledJobManager lock expiration from 60s to 90s for high-load environments
- Implement safe queue cleanup with restart vs runtime modes
  - Restart mode: aggressive cleanup (marks all processing jobs as failed)
  - Runtime mode: conservative cleanup (only marks jobs >12h as failed, skips deployments)
- Add cleanup:redis --restart flag for system startup
- Integrate cleanup into Dev.php init() for development environment
- Increase scheduled-errors log retention from 7 to 14 days
- Create comprehensive test suite (unit and feature tests)
- Add TESTING_GUIDE.md with manual testing instructions

Fixes issues with jobs failing after single attempt and "attempted too many times" errors
2025-11-10 11:11:18 +01:00

221 lines
7.8 KiB
PHP

<?php
namespace App\Jobs;
use App\Events\ScheduledTaskDone;
use App\Exceptions\NonReportableException;
use App\Models\Application;
use App\Models\ScheduledTask;
use App\Models\ScheduledTaskExecution;
use App\Models\Server;
use App\Models\Service;
use App\Models\Team;
use App\Notifications\ScheduledTask\TaskFailed;
use App\Notifications\ScheduledTask\TaskSuccess;
use Carbon\Carbon;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\Log;
class ScheduledTaskJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
/**
* The number of times the job may be attempted.
*/
public $tries = 3;
/**
* The maximum number of unhandled exceptions to allow before failing.
*/
public $maxExceptions = 1;
/**
* The number of seconds the job can run before timing out.
*/
public $timeout = 300;
public Team $team;
public Server $server;
public ScheduledTask $task;
public Application|Service $resource;
public ?ScheduledTaskExecution $task_log = null;
public string $task_status = 'failed';
public ?string $task_output = null;
public array $containers = [];
public string $server_timezone;
public function __construct($task)
{
$this->onQueue('high');
$this->task = $task;
if ($service = $task->service()->first()) {
$this->resource = $service;
} elseif ($application = $task->application()->first()) {
$this->resource = $application;
} else {
throw new \RuntimeException('ScheduledTaskJob failed: No resource found.');
}
$this->team = Team::findOrFail($task->team_id);
$this->server_timezone = $this->getServerTimezone();
// Set timeout from task configuration
$this->timeout = $this->task->timeout ?? 300;
}
private function getServerTimezone(): string
{
if ($this->resource instanceof Application) {
return $this->resource->destination->server->settings->server_timezone;
} elseif ($this->resource instanceof Service) {
return $this->resource->server->settings->server_timezone;
}
return 'UTC';
}
public function handle(): void
{
$startTime = Carbon::now();
try {
$this->task_log = ScheduledTaskExecution::create([
'scheduled_task_id' => $this->task->id,
'started_at' => $startTime,
'retry_count' => $this->attempts() - 1,
]);
$this->server = $this->resource->destination->server;
if ($this->resource->type() === 'application') {
$containers = getCurrentApplicationContainerStatus($this->server, $this->resource->id, 0);
if ($containers->count() > 0) {
$containers->each(function ($container) {
$this->containers[] = str_replace('/', '', $container['Names']);
});
}
} elseif ($this->resource->type() === 'service') {
$this->resource->applications()->get()->each(function ($application) {
if (str(data_get($application, 'status'))->contains('running')) {
$this->containers[] = data_get($application, 'name').'-'.data_get($this->resource, 'uuid');
}
});
$this->resource->databases()->get()->each(function ($database) {
if (str(data_get($database, 'status'))->contains('running')) {
$this->containers[] = data_get($database, 'name').'-'.data_get($this->resource, 'uuid');
}
});
}
if (count($this->containers) == 0) {
throw new \Exception('ScheduledTaskJob failed: No containers running.');
}
if (count($this->containers) > 1 && empty($this->task->container)) {
throw new \Exception('ScheduledTaskJob failed: More than one container exists but no container name was provided.');
}
foreach ($this->containers as $containerName) {
if (count($this->containers) == 1 || str_starts_with($containerName, $this->task->container.'-'.$this->resource->uuid)) {
$cmd = "sh -c '".str_replace("'", "'\''", $this->task->command)."'";
$exec = "docker exec {$containerName} {$cmd}";
$this->task_output = instant_remote_process([$exec], $this->server, true);
$this->task_log->update([
'status' => 'success',
'message' => $this->task_output,
]);
$this->team?->notify(new TaskSuccess($this->task, $this->task_output));
return;
}
}
// No valid container was found.
throw new NonReportableException('ScheduledTaskJob failed: No valid container was found. Is the container name correct?');
} catch (\Throwable $e) {
if ($this->task_log) {
$this->task_log->update([
'status' => 'failed',
'message' => $this->task_output ?? $e->getMessage(),
]);
}
// Log the error to the scheduled-errors channel
Log::channel('scheduled-errors')->error('ScheduledTask execution failed', [
'job' => 'ScheduledTaskJob',
'task_id' => $this->task->uuid,
'task_name' => $this->task->name,
'server' => $this->server->name ?? 'unknown',
'attempt' => $this->attempts(),
'error' => $e->getMessage(),
]);
// Only notify and throw on final failure
if ($this->attempts() >= $this->tries) {
$this->team?->notify(new TaskFailed($this->task, $e->getMessage()));
}
// Re-throw to trigger Laravel's retry mechanism with backoff
throw $e;
} finally {
ScheduledTaskDone::dispatch($this->team->id);
if ($this->task_log) {
$finishedAt = Carbon::now();
$duration = round($startTime->floatDiffInSeconds($finishedAt), 2);
$this->task_log->update([
'finished_at' => $finishedAt->toImmutable(),
'duration' => $duration,
]);
}
}
}
/**
* Calculate the number of seconds to wait before retrying the job.
*/
public function backoff(): array
{
return [30, 60, 120]; // 30s, 60s, 120s between retries
}
/**
* Handle a job failure.
*/
public function failed(?\Throwable $exception): void
{
Log::channel('scheduled-errors')->error('ScheduledTask permanently failed', [
'job' => 'ScheduledTaskJob',
'task_id' => $this->task->uuid,
'task_name' => $this->task->name,
'server' => $this->server->name ?? 'unknown',
'total_attempts' => $this->attempts(),
'error' => $exception?->getMessage(),
'trace' => $exception?->getTraceAsString(),
]);
if ($this->task_log) {
$this->task_log->update([
'status' => 'failed',
'message' => 'Job permanently failed after '.$this->attempts().' attempts: '.($exception?->getMessage() ?? 'Unknown error'),
'finished_at' => Carbon::now()->toImmutable(),
]);
}
// Notify team about permanent failure
$this->team?->notify(new TaskFailed($this->task, $exception?->getMessage() ?? 'Unknown error'));
}
}