fix(backups): enforce retention and clean up stale executions

Add `WithoutOverlapping` middleware to `DatabaseBackupJob` keyed by backup ID
with timeout-based lock expiry to prevent concurrent runs.

Mark long-running backup executions as failed when they exceed the stale
time threshold, and add periodic retention enforcement in
`CleanupInstanceStuffsJob` with cache-based throttling.

Also add float casts for retention max-storage fields on
`ScheduledDatabaseBackup` and comprehensive feature tests covering
overlap middleware, stale detection, casts, and retention behavior.
This commit is contained in:
Andras Bacsai 2026-04-03 11:33:21 +02:00
parent 968508583d
commit ffb5045c6a
4 changed files with 319 additions and 0 deletions

View file

@ -2,6 +2,7 @@
namespace App\Jobs;
use App\Models\ScheduledDatabaseBackup;
use App\Models\TeamInvitation;
use App\Models\User;
use Illuminate\Bus\Queueable;
@ -12,6 +13,7 @@
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\Middleware\WithoutOverlapping;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\Cache;
use Illuminate\Support\Facades\Log;
class CleanupInstanceStuffsJob implements ShouldBeEncrypted, ShouldBeUnique, ShouldQueue
@ -32,6 +34,7 @@ public function handle(): void
try {
$this->cleanupInvitationLink();
$this->cleanupExpiredEmailChangeRequests();
$this->enforceBackupRetention();
} catch (\Throwable $e) {
Log::error('CleanupInstanceStuffsJob failed with error: '.$e->getMessage());
}
@ -55,4 +58,25 @@ private function cleanupExpiredEmailChangeRequests()
'email_change_code_expires_at' => null,
]);
}
private function enforceBackupRetention(): void
{
if (! Cache::add('backup-retention-enforcement', true, 1800)) {
return;
}
try {
$backups = ScheduledDatabaseBackup::where('enabled', true)->get();
foreach ($backups as $backup) {
try {
removeOldBackups($backup);
} catch (\Throwable $e) {
Log::warning('Failed to enforce retention for backup '.$backup->id.': '.$e->getMessage());
}
}
} catch (\Throwable $e) {
Log::error('Failed to enforce backup retention: '.$e->getMessage());
Cache::forget('backup-retention-enforcement');
}
}
}

View file

@ -22,6 +22,7 @@
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\Middleware\WithoutOverlapping;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Str;
@ -80,6 +81,13 @@ public function __construct(public ScheduledDatabaseBackup $backup)
$this->timeout = $backup->timeout ?? 3600;
}
public function middleware(): array
{
$expireAfter = ($this->backup->timeout ?? 3600) + 300;
return [(new WithoutOverlapping('database-backup-'.$this->backup->id))->expireAfter($expireAfter)->dontRelease()];
}
public function handle(): void
{
try {
@ -107,6 +115,8 @@ public function handle(): void
throw new \Exception('Database not found?!');
}
$this->markStaleExecutionsAsFailed();
BackupCreated::dispatch($this->team->id);
$status = str(data_get($this->database, 'status'));
@ -727,6 +737,31 @@ private function getFullImageName(): string
return "{$helperImage}:{$latestVersion}";
}
private function markStaleExecutionsAsFailed(): void
{
try {
$timeoutSeconds = ($this->backup->timeout ?? 3600) * 2;
$staleExecutions = $this->backup->executions()
->where('status', 'running')
->where('created_at', '<', now()->subSeconds($timeoutSeconds))
->get();
foreach ($staleExecutions as $execution) {
$execution->update([
'status' => 'failed',
'message' => 'Marked as failed - backup execution exceeded maximum allowed time',
'finished_at' => now(),
]);
}
} catch (Throwable $e) {
Log::channel('scheduled-errors')->warning('Failed to clean up stale backup executions', [
'backup_id' => $this->backup->uuid,
'error' => $e->getMessage(),
]);
}
}
public function failed(?Throwable $exception): void
{
Log::channel('scheduled-errors')->error('DatabaseBackup permanently failed', [

View file

@ -8,6 +8,14 @@
class ScheduledDatabaseBackup extends BaseModel
{
protected function casts(): array
{
return [
'database_backup_retention_max_storage_locally' => 'float',
'database_backup_retention_max_storage_s3' => 'float',
];
}
protected $fillable = [
'uuid',
'team_id',

View file

@ -0,0 +1,252 @@
<?php
use App\Jobs\CleanupInstanceStuffsJob;
use App\Jobs\DatabaseBackupJob;
use App\Models\ScheduledDatabaseBackup;
use App\Models\ScheduledDatabaseBackupExecution;
use App\Models\Team;
use Illuminate\Foundation\Testing\RefreshDatabase;
use Illuminate\Queue\Middleware\WithoutOverlapping;
use Illuminate\Support\Facades\Cache;
uses(RefreshDatabase::class);
// --- WithoutOverlapping middleware on DatabaseBackupJob ---
test('database backup job has WithoutOverlapping middleware keyed by backup id', function () {
$team = Team::factory()->create();
$backup = ScheduledDatabaseBackup::create([
'frequency' => '0 0 * * *',
'save_s3' => false,
'database_type' => 'App\Models\StandalonePostgresql',
'database_id' => 1,
'team_id' => $team->id,
]);
$job = new DatabaseBackupJob($backup);
$middleware = $job->middleware();
expect($middleware)->toHaveCount(1);
expect($middleware[0])->toBeInstanceOf(WithoutOverlapping::class);
});
test('database backup job middleware uses custom timeout plus buffer', function () {
$team = Team::factory()->create();
$backup = ScheduledDatabaseBackup::create([
'frequency' => '0 0 * * *',
'save_s3' => false,
'database_type' => 'App\Models\StandalonePostgresql',
'database_id' => 1,
'team_id' => $team->id,
'timeout' => 7200,
]);
$job = new DatabaseBackupJob($backup);
$middleware = $job->middleware();
$reflection = new ReflectionClass($middleware[0]);
$expiresAfterProp = $reflection->getProperty('expiresAfter');
$releaseAfterProp = $reflection->getProperty('releaseAfter');
expect($expiresAfterProp->getValue($middleware[0]))->toBe(7500); // 7200 + 300
expect($releaseAfterProp->getValue($middleware[0]))->toBeNull(); // dontRelease sets to null
});
// --- Stale backup detection in DatabaseBackupJob ---
test('markStaleExecutionsAsFailed marks stale running executions as failed', function () {
$team = Team::factory()->create();
$backup = ScheduledDatabaseBackup::create([
'frequency' => '0 0 * * *',
'save_s3' => false,
'database_type' => 'App\Models\StandalonePostgresql',
'database_id' => 1,
'team_id' => $team->id,
'timeout' => 3600,
]);
// Create a stale execution (3 hours old, timeout is 1h, threshold is 2h)
$staleExecution = ScheduledDatabaseBackupExecution::create([
'uuid' => 'stale-exec-uuid',
'database_name' => 'test_db',
'scheduled_database_backup_id' => $backup->id,
'status' => 'running',
]);
ScheduledDatabaseBackupExecution::where('id', $staleExecution->id)
->update(['created_at' => now()->subHours(3)]);
// Call the private method via reflection
$job = new DatabaseBackupJob($backup);
$reflection = new ReflectionClass($job);
$method = $reflection->getMethod('markStaleExecutionsAsFailed');
$method->invoke($job);
$staleExecution->refresh();
expect($staleExecution->status)->toBe('failed');
expect($staleExecution->message)->toContain('exceeded maximum allowed time');
expect($staleExecution->finished_at)->not->toBeNull();
});
test('markStaleExecutionsAsFailed does not mark recent running executions as failed', function () {
$team = Team::factory()->create();
$backup = ScheduledDatabaseBackup::create([
'frequency' => '0 0 * * *',
'save_s3' => false,
'database_type' => 'App\Models\StandalonePostgresql',
'database_id' => 1,
'team_id' => $team->id,
'timeout' => 3600,
]);
// Create a recent execution (30 minutes old, threshold is 2 hours)
$recentExecution = ScheduledDatabaseBackupExecution::create([
'uuid' => 'recent-exec-uuid',
'database_name' => 'test_db',
'scheduled_database_backup_id' => $backup->id,
'status' => 'running',
]);
ScheduledDatabaseBackupExecution::where('id', $recentExecution->id)
->update(['created_at' => now()->subMinutes(30)]);
$job = new DatabaseBackupJob($backup);
$reflection = new ReflectionClass($job);
$method = $reflection->getMethod('markStaleExecutionsAsFailed');
$method->invoke($job);
$recentExecution->refresh();
expect($recentExecution->status)->toBe('running');
expect($recentExecution->finished_at)->toBeNull();
});
// --- Decimal cast on ScheduledDatabaseBackup model ---
test('scheduled database backup model casts max storage fields to float', function () {
$model = new ScheduledDatabaseBackup;
$casts = $model->getCasts();
expect($casts)->toHaveKey('database_backup_retention_max_storage_locally');
expect($casts['database_backup_retention_max_storage_locally'])->toBe('float');
expect($casts)->toHaveKey('database_backup_retention_max_storage_s3');
expect($casts['database_backup_retention_max_storage_s3'])->toBe('float');
});
test('max storage retention field returns float zero that passes strict comparison', function () {
$team = Team::factory()->create();
$backup = ScheduledDatabaseBackup::create([
'frequency' => '0 0 * * *',
'save_s3' => false,
'database_type' => 'App\Models\StandalonePostgresql',
'database_id' => 1,
'team_id' => $team->id,
'database_backup_retention_max_storage_locally' => 0,
]);
$backup->refresh();
// With the float cast, strict comparison to 0.0 works
expect($backup->database_backup_retention_max_storage_locally)->toBe(0.0);
// And loose comparison to int 0 also works
expect($backup->database_backup_retention_max_storage_locally == 0)->toBeTrue();
});
// --- Periodic retention enforcement in CleanupInstanceStuffsJob ---
test('cleanup instance stuffs job calls retention enforcement without errors', function () {
$team = Team::factory()->create();
$backup = ScheduledDatabaseBackup::create([
'frequency' => '0 0 * * *',
'save_s3' => false,
'enabled' => true,
'database_type' => 'App\Models\StandalonePostgresql',
'database_id' => 1,
'team_id' => $team->id,
'database_backup_retention_amount_locally' => 1,
]);
// Create 3 successful executions
foreach (range(1, 3) as $i) {
ScheduledDatabaseBackupExecution::create([
'uuid' => "exec-uuid-{$i}",
'database_name' => 'test_db',
'filename' => "/backup/test_{$i}.dmp",
'scheduled_database_backup_id' => $backup->id,
'status' => 'success',
'size' => 1024,
'local_storage_deleted' => false,
'created_at' => now()->subDays($i),
]);
}
Cache::forget('backup-retention-enforcement');
// Should not throw — the job gracefully handles missing servers
$job = new CleanupInstanceStuffsJob;
$job->handle();
// All 3 executions should still exist (no server to delete files from)
expect($backup->executions()->count())->toBe(3);
});
test('deleteOldBackupsLocally identifies correct backups for deletion by retention count', function () {
$team = Team::factory()->create();
$backup = ScheduledDatabaseBackup::create([
'frequency' => '0 0 * * *',
'save_s3' => false,
'enabled' => true,
'database_type' => 'App\Models\StandalonePostgresql',
'database_id' => 1,
'team_id' => $team->id,
'database_backup_retention_amount_locally' => 1,
]);
foreach (range(1, 3) as $i) {
$exec = ScheduledDatabaseBackupExecution::create([
'uuid' => "exec-uuid-{$i}",
'database_name' => 'test_db',
'filename' => "/backup/test_{$i}.dmp",
'scheduled_database_backup_id' => $backup->id,
'status' => 'success',
'size' => 1024,
'local_storage_deleted' => false,
]);
ScheduledDatabaseBackupExecution::where('id', $exec->id)
->update(['created_at' => now()->subDays($i)]);
}
// Test the selection logic directly
$successfulBackups = $backup->executions()
->where('status', 'success')
->where('local_storage_deleted', false)
->orderBy('created_at', 'desc')
->get();
$retentionAmount = $backup->database_backup_retention_amount_locally;
$backupsToDelete = $successfulBackups->skip($retentionAmount);
// exec-uuid-1 is newest (1 day ago), exec-uuid-2 and exec-uuid-3 should be deleted
expect($backupsToDelete)->toHaveCount(2);
expect($successfulBackups->first()->uuid)->toBe('exec-uuid-1');
});
test('cleanup instance stuffs job throttles retention enforcement via cache', function () {
Cache::forget('backup-retention-enforcement');
$job = new CleanupInstanceStuffsJob;
$job->handle();
// The cache key should now exist
expect(Cache::has('backup-retention-enforcement'))->toBeTrue();
// Running again should skip retention enforcement (cache hit)
// We verify by checking the lock is still held — no error thrown
$job->handle();
expect(Cache::has('backup-retention-enforcement'))->toBeTrue();
});