From ffb5045c6ab7d02b8b2c2108cb179e02b931ac32 Mon Sep 17 00:00:00 2001 From: Andras Bacsai <5845193+andrasbacsai@users.noreply.github.com> Date: Fri, 3 Apr 2026 11:33:21 +0200 Subject: [PATCH] fix(backups): enforce retention and clean up stale executions Add `WithoutOverlapping` middleware to `DatabaseBackupJob` keyed by backup ID with timeout-based lock expiry to prevent concurrent runs. Mark long-running backup executions as failed when they exceed the stale time threshold, and add periodic retention enforcement in `CleanupInstanceStuffsJob` with cache-based throttling. Also add float casts for retention max-storage fields on `ScheduledDatabaseBackup` and comprehensive feature tests covering overlap middleware, stale detection, casts, and retention behavior. --- app/Jobs/CleanupInstanceStuffsJob.php | 24 ++ app/Jobs/DatabaseBackupJob.php | 35 +++ app/Models/ScheduledDatabaseBackup.php | 8 + .../BackupRetentionAndStaleDetectionTest.php | 252 ++++++++++++++++++ 4 files changed, 319 insertions(+) create mode 100644 tests/Feature/BackupRetentionAndStaleDetectionTest.php diff --git a/app/Jobs/CleanupInstanceStuffsJob.php b/app/Jobs/CleanupInstanceStuffsJob.php index 011c58639..e37a39c3d 100644 --- a/app/Jobs/CleanupInstanceStuffsJob.php +++ b/app/Jobs/CleanupInstanceStuffsJob.php @@ -2,6 +2,7 @@ namespace App\Jobs; +use App\Models\ScheduledDatabaseBackup; use App\Models\TeamInvitation; use App\Models\User; use Illuminate\Bus\Queueable; @@ -12,6 +13,7 @@ use Illuminate\Queue\InteractsWithQueue; use Illuminate\Queue\Middleware\WithoutOverlapping; use Illuminate\Queue\SerializesModels; +use Illuminate\Support\Facades\Cache; use Illuminate\Support\Facades\Log; class CleanupInstanceStuffsJob implements ShouldBeEncrypted, ShouldBeUnique, ShouldQueue @@ -32,6 +34,7 @@ public function handle(): void try { $this->cleanupInvitationLink(); $this->cleanupExpiredEmailChangeRequests(); + $this->enforceBackupRetention(); } catch (\Throwable $e) { Log::error('CleanupInstanceStuffsJob failed with error: '.$e->getMessage()); } @@ -55,4 +58,25 @@ private function cleanupExpiredEmailChangeRequests() 'email_change_code_expires_at' => null, ]); } + + private function enforceBackupRetention(): void + { + if (! Cache::add('backup-retention-enforcement', true, 1800)) { + return; + } + + try { + $backups = ScheduledDatabaseBackup::where('enabled', true)->get(); + foreach ($backups as $backup) { + try { + removeOldBackups($backup); + } catch (\Throwable $e) { + Log::warning('Failed to enforce retention for backup '.$backup->id.': '.$e->getMessage()); + } + } + } catch (\Throwable $e) { + Log::error('Failed to enforce backup retention: '.$e->getMessage()); + Cache::forget('backup-retention-enforcement'); + } + } } diff --git a/app/Jobs/DatabaseBackupJob.php b/app/Jobs/DatabaseBackupJob.php index a2d08e1e8..207191cbd 100644 --- a/app/Jobs/DatabaseBackupJob.php +++ b/app/Jobs/DatabaseBackupJob.php @@ -22,6 +22,7 @@ use Illuminate\Contracts\Queue\ShouldQueue; use Illuminate\Foundation\Bus\Dispatchable; use Illuminate\Queue\InteractsWithQueue; +use Illuminate\Queue\Middleware\WithoutOverlapping; use Illuminate\Queue\SerializesModels; use Illuminate\Support\Facades\Log; use Illuminate\Support\Str; @@ -80,6 +81,13 @@ public function __construct(public ScheduledDatabaseBackup $backup) $this->timeout = $backup->timeout ?? 3600; } + public function middleware(): array + { + $expireAfter = ($this->backup->timeout ?? 3600) + 300; + + return [(new WithoutOverlapping('database-backup-'.$this->backup->id))->expireAfter($expireAfter)->dontRelease()]; + } + public function handle(): void { try { @@ -107,6 +115,8 @@ public function handle(): void throw new \Exception('Database not found?!'); } + $this->markStaleExecutionsAsFailed(); + BackupCreated::dispatch($this->team->id); $status = str(data_get($this->database, 'status')); @@ -727,6 +737,31 @@ private function getFullImageName(): string return "{$helperImage}:{$latestVersion}"; } + private function markStaleExecutionsAsFailed(): void + { + try { + $timeoutSeconds = ($this->backup->timeout ?? 3600) * 2; + + $staleExecutions = $this->backup->executions() + ->where('status', 'running') + ->where('created_at', '<', now()->subSeconds($timeoutSeconds)) + ->get(); + + foreach ($staleExecutions as $execution) { + $execution->update([ + 'status' => 'failed', + 'message' => 'Marked as failed - backup execution exceeded maximum allowed time', + 'finished_at' => now(), + ]); + } + } catch (Throwable $e) { + Log::channel('scheduled-errors')->warning('Failed to clean up stale backup executions', [ + 'backup_id' => $this->backup->uuid, + 'error' => $e->getMessage(), + ]); + } + } + public function failed(?Throwable $exception): void { Log::channel('scheduled-errors')->error('DatabaseBackup permanently failed', [ diff --git a/app/Models/ScheduledDatabaseBackup.php b/app/Models/ScheduledDatabaseBackup.php index 6308bae8b..4038c6288 100644 --- a/app/Models/ScheduledDatabaseBackup.php +++ b/app/Models/ScheduledDatabaseBackup.php @@ -8,6 +8,14 @@ class ScheduledDatabaseBackup extends BaseModel { + protected function casts(): array + { + return [ + 'database_backup_retention_max_storage_locally' => 'float', + 'database_backup_retention_max_storage_s3' => 'float', + ]; + } + protected $fillable = [ 'uuid', 'team_id', diff --git a/tests/Feature/BackupRetentionAndStaleDetectionTest.php b/tests/Feature/BackupRetentionAndStaleDetectionTest.php new file mode 100644 index 000000000..cd4a76feb --- /dev/null +++ b/tests/Feature/BackupRetentionAndStaleDetectionTest.php @@ -0,0 +1,252 @@ +create(); + + $backup = ScheduledDatabaseBackup::create([ + 'frequency' => '0 0 * * *', + 'save_s3' => false, + 'database_type' => 'App\Models\StandalonePostgresql', + 'database_id' => 1, + 'team_id' => $team->id, + ]); + + $job = new DatabaseBackupJob($backup); + $middleware = $job->middleware(); + + expect($middleware)->toHaveCount(1); + expect($middleware[0])->toBeInstanceOf(WithoutOverlapping::class); +}); + +test('database backup job middleware uses custom timeout plus buffer', function () { + $team = Team::factory()->create(); + + $backup = ScheduledDatabaseBackup::create([ + 'frequency' => '0 0 * * *', + 'save_s3' => false, + 'database_type' => 'App\Models\StandalonePostgresql', + 'database_id' => 1, + 'team_id' => $team->id, + 'timeout' => 7200, + ]); + + $job = new DatabaseBackupJob($backup); + $middleware = $job->middleware(); + + $reflection = new ReflectionClass($middleware[0]); + $expiresAfterProp = $reflection->getProperty('expiresAfter'); + $releaseAfterProp = $reflection->getProperty('releaseAfter'); + + expect($expiresAfterProp->getValue($middleware[0]))->toBe(7500); // 7200 + 300 + expect($releaseAfterProp->getValue($middleware[0]))->toBeNull(); // dontRelease sets to null +}); + +// --- Stale backup detection in DatabaseBackupJob --- + +test('markStaleExecutionsAsFailed marks stale running executions as failed', function () { + $team = Team::factory()->create(); + + $backup = ScheduledDatabaseBackup::create([ + 'frequency' => '0 0 * * *', + 'save_s3' => false, + 'database_type' => 'App\Models\StandalonePostgresql', + 'database_id' => 1, + 'team_id' => $team->id, + 'timeout' => 3600, + ]); + + // Create a stale execution (3 hours old, timeout is 1h, threshold is 2h) + $staleExecution = ScheduledDatabaseBackupExecution::create([ + 'uuid' => 'stale-exec-uuid', + 'database_name' => 'test_db', + 'scheduled_database_backup_id' => $backup->id, + 'status' => 'running', + ]); + ScheduledDatabaseBackupExecution::where('id', $staleExecution->id) + ->update(['created_at' => now()->subHours(3)]); + + // Call the private method via reflection + $job = new DatabaseBackupJob($backup); + $reflection = new ReflectionClass($job); + $method = $reflection->getMethod('markStaleExecutionsAsFailed'); + $method->invoke($job); + + $staleExecution->refresh(); + expect($staleExecution->status)->toBe('failed'); + expect($staleExecution->message)->toContain('exceeded maximum allowed time'); + expect($staleExecution->finished_at)->not->toBeNull(); +}); + +test('markStaleExecutionsAsFailed does not mark recent running executions as failed', function () { + $team = Team::factory()->create(); + + $backup = ScheduledDatabaseBackup::create([ + 'frequency' => '0 0 * * *', + 'save_s3' => false, + 'database_type' => 'App\Models\StandalonePostgresql', + 'database_id' => 1, + 'team_id' => $team->id, + 'timeout' => 3600, + ]); + + // Create a recent execution (30 minutes old, threshold is 2 hours) + $recentExecution = ScheduledDatabaseBackupExecution::create([ + 'uuid' => 'recent-exec-uuid', + 'database_name' => 'test_db', + 'scheduled_database_backup_id' => $backup->id, + 'status' => 'running', + ]); + ScheduledDatabaseBackupExecution::where('id', $recentExecution->id) + ->update(['created_at' => now()->subMinutes(30)]); + + $job = new DatabaseBackupJob($backup); + $reflection = new ReflectionClass($job); + $method = $reflection->getMethod('markStaleExecutionsAsFailed'); + $method->invoke($job); + + $recentExecution->refresh(); + expect($recentExecution->status)->toBe('running'); + expect($recentExecution->finished_at)->toBeNull(); +}); + +// --- Decimal cast on ScheduledDatabaseBackup model --- + +test('scheduled database backup model casts max storage fields to float', function () { + $model = new ScheduledDatabaseBackup; + $casts = $model->getCasts(); + + expect($casts)->toHaveKey('database_backup_retention_max_storage_locally'); + expect($casts['database_backup_retention_max_storage_locally'])->toBe('float'); + expect($casts)->toHaveKey('database_backup_retention_max_storage_s3'); + expect($casts['database_backup_retention_max_storage_s3'])->toBe('float'); +}); + +test('max storage retention field returns float zero that passes strict comparison', function () { + $team = Team::factory()->create(); + + $backup = ScheduledDatabaseBackup::create([ + 'frequency' => '0 0 * * *', + 'save_s3' => false, + 'database_type' => 'App\Models\StandalonePostgresql', + 'database_id' => 1, + 'team_id' => $team->id, + 'database_backup_retention_max_storage_locally' => 0, + ]); + + $backup->refresh(); + + // With the float cast, strict comparison to 0.0 works + expect($backup->database_backup_retention_max_storage_locally)->toBe(0.0); + // And loose comparison to int 0 also works + expect($backup->database_backup_retention_max_storage_locally == 0)->toBeTrue(); +}); + +// --- Periodic retention enforcement in CleanupInstanceStuffsJob --- + +test('cleanup instance stuffs job calls retention enforcement without errors', function () { + $team = Team::factory()->create(); + + $backup = ScheduledDatabaseBackup::create([ + 'frequency' => '0 0 * * *', + 'save_s3' => false, + 'enabled' => true, + 'database_type' => 'App\Models\StandalonePostgresql', + 'database_id' => 1, + 'team_id' => $team->id, + 'database_backup_retention_amount_locally' => 1, + ]); + + // Create 3 successful executions + foreach (range(1, 3) as $i) { + ScheduledDatabaseBackupExecution::create([ + 'uuid' => "exec-uuid-{$i}", + 'database_name' => 'test_db', + 'filename' => "/backup/test_{$i}.dmp", + 'scheduled_database_backup_id' => $backup->id, + 'status' => 'success', + 'size' => 1024, + 'local_storage_deleted' => false, + 'created_at' => now()->subDays($i), + ]); + } + + Cache::forget('backup-retention-enforcement'); + + // Should not throw — the job gracefully handles missing servers + $job = new CleanupInstanceStuffsJob; + $job->handle(); + + // All 3 executions should still exist (no server to delete files from) + expect($backup->executions()->count())->toBe(3); +}); + +test('deleteOldBackupsLocally identifies correct backups for deletion by retention count', function () { + $team = Team::factory()->create(); + + $backup = ScheduledDatabaseBackup::create([ + 'frequency' => '0 0 * * *', + 'save_s3' => false, + 'enabled' => true, + 'database_type' => 'App\Models\StandalonePostgresql', + 'database_id' => 1, + 'team_id' => $team->id, + 'database_backup_retention_amount_locally' => 1, + ]); + + foreach (range(1, 3) as $i) { + $exec = ScheduledDatabaseBackupExecution::create([ + 'uuid' => "exec-uuid-{$i}", + 'database_name' => 'test_db', + 'filename' => "/backup/test_{$i}.dmp", + 'scheduled_database_backup_id' => $backup->id, + 'status' => 'success', + 'size' => 1024, + 'local_storage_deleted' => false, + ]); + ScheduledDatabaseBackupExecution::where('id', $exec->id) + ->update(['created_at' => now()->subDays($i)]); + } + + // Test the selection logic directly + $successfulBackups = $backup->executions() + ->where('status', 'success') + ->where('local_storage_deleted', false) + ->orderBy('created_at', 'desc') + ->get(); + + $retentionAmount = $backup->database_backup_retention_amount_locally; + $backupsToDelete = $successfulBackups->skip($retentionAmount); + + // exec-uuid-1 is newest (1 day ago), exec-uuid-2 and exec-uuid-3 should be deleted + expect($backupsToDelete)->toHaveCount(2); + expect($successfulBackups->first()->uuid)->toBe('exec-uuid-1'); +}); + +test('cleanup instance stuffs job throttles retention enforcement via cache', function () { + Cache::forget('backup-retention-enforcement'); + + $job = new CleanupInstanceStuffsJob; + $job->handle(); + + // The cache key should now exist + expect(Cache::has('backup-retention-enforcement'))->toBeTrue(); + + // Running again should skip retention enforcement (cache hit) + // We verify by checking the lock is still held — no error thrown + $job->handle(); + + expect(Cache::has('backup-retention-enforcement'))->toBeTrue(); +});