fix(backups): enforce retention and clean up stale executions
Add `WithoutOverlapping` middleware to `DatabaseBackupJob` keyed by backup ID with timeout-based lock expiry to prevent concurrent runs. Mark long-running backup executions as failed when they exceed the stale time threshold, and add periodic retention enforcement in `CleanupInstanceStuffsJob` with cache-based throttling. Also add float casts for retention max-storage fields on `ScheduledDatabaseBackup` and comprehensive feature tests covering overlap middleware, stale detection, casts, and retention behavior.
This commit is contained in:
parent
968508583d
commit
ffb5045c6a
4 changed files with 319 additions and 0 deletions
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
namespace App\Jobs;
|
||||
|
||||
use App\Models\ScheduledDatabaseBackup;
|
||||
use App\Models\TeamInvitation;
|
||||
use App\Models\User;
|
||||
use Illuminate\Bus\Queueable;
|
||||
|
|
@ -12,6 +13,7 @@
|
|||
use Illuminate\Queue\InteractsWithQueue;
|
||||
use Illuminate\Queue\Middleware\WithoutOverlapping;
|
||||
use Illuminate\Queue\SerializesModels;
|
||||
use Illuminate\Support\Facades\Cache;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
|
||||
class CleanupInstanceStuffsJob implements ShouldBeEncrypted, ShouldBeUnique, ShouldQueue
|
||||
|
|
@ -32,6 +34,7 @@ public function handle(): void
|
|||
try {
|
||||
$this->cleanupInvitationLink();
|
||||
$this->cleanupExpiredEmailChangeRequests();
|
||||
$this->enforceBackupRetention();
|
||||
} catch (\Throwable $e) {
|
||||
Log::error('CleanupInstanceStuffsJob failed with error: '.$e->getMessage());
|
||||
}
|
||||
|
|
@ -55,4 +58,25 @@ private function cleanupExpiredEmailChangeRequests()
|
|||
'email_change_code_expires_at' => null,
|
||||
]);
|
||||
}
|
||||
|
||||
private function enforceBackupRetention(): void
|
||||
{
|
||||
if (! Cache::add('backup-retention-enforcement', true, 1800)) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
$backups = ScheduledDatabaseBackup::where('enabled', true)->get();
|
||||
foreach ($backups as $backup) {
|
||||
try {
|
||||
removeOldBackups($backup);
|
||||
} catch (\Throwable $e) {
|
||||
Log::warning('Failed to enforce retention for backup '.$backup->id.': '.$e->getMessage());
|
||||
}
|
||||
}
|
||||
} catch (\Throwable $e) {
|
||||
Log::error('Failed to enforce backup retention: '.$e->getMessage());
|
||||
Cache::forget('backup-retention-enforcement');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@
|
|||
use Illuminate\Contracts\Queue\ShouldQueue;
|
||||
use Illuminate\Foundation\Bus\Dispatchable;
|
||||
use Illuminate\Queue\InteractsWithQueue;
|
||||
use Illuminate\Queue\Middleware\WithoutOverlapping;
|
||||
use Illuminate\Queue\SerializesModels;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
use Illuminate\Support\Str;
|
||||
|
|
@ -80,6 +81,13 @@ public function __construct(public ScheduledDatabaseBackup $backup)
|
|||
$this->timeout = $backup->timeout ?? 3600;
|
||||
}
|
||||
|
||||
public function middleware(): array
|
||||
{
|
||||
$expireAfter = ($this->backup->timeout ?? 3600) + 300;
|
||||
|
||||
return [(new WithoutOverlapping('database-backup-'.$this->backup->id))->expireAfter($expireAfter)->dontRelease()];
|
||||
}
|
||||
|
||||
public function handle(): void
|
||||
{
|
||||
try {
|
||||
|
|
@ -107,6 +115,8 @@ public function handle(): void
|
|||
throw new \Exception('Database not found?!');
|
||||
}
|
||||
|
||||
$this->markStaleExecutionsAsFailed();
|
||||
|
||||
BackupCreated::dispatch($this->team->id);
|
||||
|
||||
$status = str(data_get($this->database, 'status'));
|
||||
|
|
@ -727,6 +737,31 @@ private function getFullImageName(): string
|
|||
return "{$helperImage}:{$latestVersion}";
|
||||
}
|
||||
|
||||
private function markStaleExecutionsAsFailed(): void
|
||||
{
|
||||
try {
|
||||
$timeoutSeconds = ($this->backup->timeout ?? 3600) * 2;
|
||||
|
||||
$staleExecutions = $this->backup->executions()
|
||||
->where('status', 'running')
|
||||
->where('created_at', '<', now()->subSeconds($timeoutSeconds))
|
||||
->get();
|
||||
|
||||
foreach ($staleExecutions as $execution) {
|
||||
$execution->update([
|
||||
'status' => 'failed',
|
||||
'message' => 'Marked as failed - backup execution exceeded maximum allowed time',
|
||||
'finished_at' => now(),
|
||||
]);
|
||||
}
|
||||
} catch (Throwable $e) {
|
||||
Log::channel('scheduled-errors')->warning('Failed to clean up stale backup executions', [
|
||||
'backup_id' => $this->backup->uuid,
|
||||
'error' => $e->getMessage(),
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
public function failed(?Throwable $exception): void
|
||||
{
|
||||
Log::channel('scheduled-errors')->error('DatabaseBackup permanently failed', [
|
||||
|
|
|
|||
|
|
@ -8,6 +8,14 @@
|
|||
|
||||
class ScheduledDatabaseBackup extends BaseModel
|
||||
{
|
||||
protected function casts(): array
|
||||
{
|
||||
return [
|
||||
'database_backup_retention_max_storage_locally' => 'float',
|
||||
'database_backup_retention_max_storage_s3' => 'float',
|
||||
];
|
||||
}
|
||||
|
||||
protected $fillable = [
|
||||
'uuid',
|
||||
'team_id',
|
||||
|
|
|
|||
252
tests/Feature/BackupRetentionAndStaleDetectionTest.php
Normal file
252
tests/Feature/BackupRetentionAndStaleDetectionTest.php
Normal file
|
|
@ -0,0 +1,252 @@
|
|||
<?php
|
||||
|
||||
use App\Jobs\CleanupInstanceStuffsJob;
|
||||
use App\Jobs\DatabaseBackupJob;
|
||||
use App\Models\ScheduledDatabaseBackup;
|
||||
use App\Models\ScheduledDatabaseBackupExecution;
|
||||
use App\Models\Team;
|
||||
use Illuminate\Foundation\Testing\RefreshDatabase;
|
||||
use Illuminate\Queue\Middleware\WithoutOverlapping;
|
||||
use Illuminate\Support\Facades\Cache;
|
||||
|
||||
uses(RefreshDatabase::class);
|
||||
|
||||
// --- WithoutOverlapping middleware on DatabaseBackupJob ---
|
||||
|
||||
test('database backup job has WithoutOverlapping middleware keyed by backup id', function () {
|
||||
$team = Team::factory()->create();
|
||||
|
||||
$backup = ScheduledDatabaseBackup::create([
|
||||
'frequency' => '0 0 * * *',
|
||||
'save_s3' => false,
|
||||
'database_type' => 'App\Models\StandalonePostgresql',
|
||||
'database_id' => 1,
|
||||
'team_id' => $team->id,
|
||||
]);
|
||||
|
||||
$job = new DatabaseBackupJob($backup);
|
||||
$middleware = $job->middleware();
|
||||
|
||||
expect($middleware)->toHaveCount(1);
|
||||
expect($middleware[0])->toBeInstanceOf(WithoutOverlapping::class);
|
||||
});
|
||||
|
||||
test('database backup job middleware uses custom timeout plus buffer', function () {
|
||||
$team = Team::factory()->create();
|
||||
|
||||
$backup = ScheduledDatabaseBackup::create([
|
||||
'frequency' => '0 0 * * *',
|
||||
'save_s3' => false,
|
||||
'database_type' => 'App\Models\StandalonePostgresql',
|
||||
'database_id' => 1,
|
||||
'team_id' => $team->id,
|
||||
'timeout' => 7200,
|
||||
]);
|
||||
|
||||
$job = new DatabaseBackupJob($backup);
|
||||
$middleware = $job->middleware();
|
||||
|
||||
$reflection = new ReflectionClass($middleware[0]);
|
||||
$expiresAfterProp = $reflection->getProperty('expiresAfter');
|
||||
$releaseAfterProp = $reflection->getProperty('releaseAfter');
|
||||
|
||||
expect($expiresAfterProp->getValue($middleware[0]))->toBe(7500); // 7200 + 300
|
||||
expect($releaseAfterProp->getValue($middleware[0]))->toBeNull(); // dontRelease sets to null
|
||||
});
|
||||
|
||||
// --- Stale backup detection in DatabaseBackupJob ---
|
||||
|
||||
test('markStaleExecutionsAsFailed marks stale running executions as failed', function () {
|
||||
$team = Team::factory()->create();
|
||||
|
||||
$backup = ScheduledDatabaseBackup::create([
|
||||
'frequency' => '0 0 * * *',
|
||||
'save_s3' => false,
|
||||
'database_type' => 'App\Models\StandalonePostgresql',
|
||||
'database_id' => 1,
|
||||
'team_id' => $team->id,
|
||||
'timeout' => 3600,
|
||||
]);
|
||||
|
||||
// Create a stale execution (3 hours old, timeout is 1h, threshold is 2h)
|
||||
$staleExecution = ScheduledDatabaseBackupExecution::create([
|
||||
'uuid' => 'stale-exec-uuid',
|
||||
'database_name' => 'test_db',
|
||||
'scheduled_database_backup_id' => $backup->id,
|
||||
'status' => 'running',
|
||||
]);
|
||||
ScheduledDatabaseBackupExecution::where('id', $staleExecution->id)
|
||||
->update(['created_at' => now()->subHours(3)]);
|
||||
|
||||
// Call the private method via reflection
|
||||
$job = new DatabaseBackupJob($backup);
|
||||
$reflection = new ReflectionClass($job);
|
||||
$method = $reflection->getMethod('markStaleExecutionsAsFailed');
|
||||
$method->invoke($job);
|
||||
|
||||
$staleExecution->refresh();
|
||||
expect($staleExecution->status)->toBe('failed');
|
||||
expect($staleExecution->message)->toContain('exceeded maximum allowed time');
|
||||
expect($staleExecution->finished_at)->not->toBeNull();
|
||||
});
|
||||
|
||||
test('markStaleExecutionsAsFailed does not mark recent running executions as failed', function () {
|
||||
$team = Team::factory()->create();
|
||||
|
||||
$backup = ScheduledDatabaseBackup::create([
|
||||
'frequency' => '0 0 * * *',
|
||||
'save_s3' => false,
|
||||
'database_type' => 'App\Models\StandalonePostgresql',
|
||||
'database_id' => 1,
|
||||
'team_id' => $team->id,
|
||||
'timeout' => 3600,
|
||||
]);
|
||||
|
||||
// Create a recent execution (30 minutes old, threshold is 2 hours)
|
||||
$recentExecution = ScheduledDatabaseBackupExecution::create([
|
||||
'uuid' => 'recent-exec-uuid',
|
||||
'database_name' => 'test_db',
|
||||
'scheduled_database_backup_id' => $backup->id,
|
||||
'status' => 'running',
|
||||
]);
|
||||
ScheduledDatabaseBackupExecution::where('id', $recentExecution->id)
|
||||
->update(['created_at' => now()->subMinutes(30)]);
|
||||
|
||||
$job = new DatabaseBackupJob($backup);
|
||||
$reflection = new ReflectionClass($job);
|
||||
$method = $reflection->getMethod('markStaleExecutionsAsFailed');
|
||||
$method->invoke($job);
|
||||
|
||||
$recentExecution->refresh();
|
||||
expect($recentExecution->status)->toBe('running');
|
||||
expect($recentExecution->finished_at)->toBeNull();
|
||||
});
|
||||
|
||||
// --- Decimal cast on ScheduledDatabaseBackup model ---
|
||||
|
||||
test('scheduled database backup model casts max storage fields to float', function () {
|
||||
$model = new ScheduledDatabaseBackup;
|
||||
$casts = $model->getCasts();
|
||||
|
||||
expect($casts)->toHaveKey('database_backup_retention_max_storage_locally');
|
||||
expect($casts['database_backup_retention_max_storage_locally'])->toBe('float');
|
||||
expect($casts)->toHaveKey('database_backup_retention_max_storage_s3');
|
||||
expect($casts['database_backup_retention_max_storage_s3'])->toBe('float');
|
||||
});
|
||||
|
||||
test('max storage retention field returns float zero that passes strict comparison', function () {
|
||||
$team = Team::factory()->create();
|
||||
|
||||
$backup = ScheduledDatabaseBackup::create([
|
||||
'frequency' => '0 0 * * *',
|
||||
'save_s3' => false,
|
||||
'database_type' => 'App\Models\StandalonePostgresql',
|
||||
'database_id' => 1,
|
||||
'team_id' => $team->id,
|
||||
'database_backup_retention_max_storage_locally' => 0,
|
||||
]);
|
||||
|
||||
$backup->refresh();
|
||||
|
||||
// With the float cast, strict comparison to 0.0 works
|
||||
expect($backup->database_backup_retention_max_storage_locally)->toBe(0.0);
|
||||
// And loose comparison to int 0 also works
|
||||
expect($backup->database_backup_retention_max_storage_locally == 0)->toBeTrue();
|
||||
});
|
||||
|
||||
// --- Periodic retention enforcement in CleanupInstanceStuffsJob ---
|
||||
|
||||
test('cleanup instance stuffs job calls retention enforcement without errors', function () {
|
||||
$team = Team::factory()->create();
|
||||
|
||||
$backup = ScheduledDatabaseBackup::create([
|
||||
'frequency' => '0 0 * * *',
|
||||
'save_s3' => false,
|
||||
'enabled' => true,
|
||||
'database_type' => 'App\Models\StandalonePostgresql',
|
||||
'database_id' => 1,
|
||||
'team_id' => $team->id,
|
||||
'database_backup_retention_amount_locally' => 1,
|
||||
]);
|
||||
|
||||
// Create 3 successful executions
|
||||
foreach (range(1, 3) as $i) {
|
||||
ScheduledDatabaseBackupExecution::create([
|
||||
'uuid' => "exec-uuid-{$i}",
|
||||
'database_name' => 'test_db',
|
||||
'filename' => "/backup/test_{$i}.dmp",
|
||||
'scheduled_database_backup_id' => $backup->id,
|
||||
'status' => 'success',
|
||||
'size' => 1024,
|
||||
'local_storage_deleted' => false,
|
||||
'created_at' => now()->subDays($i),
|
||||
]);
|
||||
}
|
||||
|
||||
Cache::forget('backup-retention-enforcement');
|
||||
|
||||
// Should not throw — the job gracefully handles missing servers
|
||||
$job = new CleanupInstanceStuffsJob;
|
||||
$job->handle();
|
||||
|
||||
// All 3 executions should still exist (no server to delete files from)
|
||||
expect($backup->executions()->count())->toBe(3);
|
||||
});
|
||||
|
||||
test('deleteOldBackupsLocally identifies correct backups for deletion by retention count', function () {
|
||||
$team = Team::factory()->create();
|
||||
|
||||
$backup = ScheduledDatabaseBackup::create([
|
||||
'frequency' => '0 0 * * *',
|
||||
'save_s3' => false,
|
||||
'enabled' => true,
|
||||
'database_type' => 'App\Models\StandalonePostgresql',
|
||||
'database_id' => 1,
|
||||
'team_id' => $team->id,
|
||||
'database_backup_retention_amount_locally' => 1,
|
||||
]);
|
||||
|
||||
foreach (range(1, 3) as $i) {
|
||||
$exec = ScheduledDatabaseBackupExecution::create([
|
||||
'uuid' => "exec-uuid-{$i}",
|
||||
'database_name' => 'test_db',
|
||||
'filename' => "/backup/test_{$i}.dmp",
|
||||
'scheduled_database_backup_id' => $backup->id,
|
||||
'status' => 'success',
|
||||
'size' => 1024,
|
||||
'local_storage_deleted' => false,
|
||||
]);
|
||||
ScheduledDatabaseBackupExecution::where('id', $exec->id)
|
||||
->update(['created_at' => now()->subDays($i)]);
|
||||
}
|
||||
|
||||
// Test the selection logic directly
|
||||
$successfulBackups = $backup->executions()
|
||||
->where('status', 'success')
|
||||
->where('local_storage_deleted', false)
|
||||
->orderBy('created_at', 'desc')
|
||||
->get();
|
||||
|
||||
$retentionAmount = $backup->database_backup_retention_amount_locally;
|
||||
$backupsToDelete = $successfulBackups->skip($retentionAmount);
|
||||
|
||||
// exec-uuid-1 is newest (1 day ago), exec-uuid-2 and exec-uuid-3 should be deleted
|
||||
expect($backupsToDelete)->toHaveCount(2);
|
||||
expect($successfulBackups->first()->uuid)->toBe('exec-uuid-1');
|
||||
});
|
||||
|
||||
test('cleanup instance stuffs job throttles retention enforcement via cache', function () {
|
||||
Cache::forget('backup-retention-enforcement');
|
||||
|
||||
$job = new CleanupInstanceStuffsJob;
|
||||
$job->handle();
|
||||
|
||||
// The cache key should now exist
|
||||
expect(Cache::has('backup-retention-enforcement'))->toBeTrue();
|
||||
|
||||
// Running again should skip retention enforcement (cache hit)
|
||||
// We verify by checking the lock is still held — no error thrown
|
||||
$job->handle();
|
||||
|
||||
expect(Cache::has('backup-retention-enforcement'))->toBeTrue();
|
||||
});
|
||||
Loading…
Reference in a new issue