Enhance container status tracking and improve user notifications (#7182)
This commit is contained in:
commit
e63a270fea
7 changed files with 215 additions and 5 deletions
|
|
@ -28,6 +28,8 @@ class GetContainersStatus
|
|||
|
||||
protected ?Collection $applicationContainerStatuses;
|
||||
|
||||
protected ?Collection $applicationContainerRestartCounts;
|
||||
|
||||
public function handle(Server $server, ?Collection $containers = null, ?Collection $containerReplicates = null)
|
||||
{
|
||||
$this->containers = $containers;
|
||||
|
|
@ -136,6 +138,18 @@ public function handle(Server $server, ?Collection $containers = null, ?Collecti
|
|||
if ($containerName) {
|
||||
$this->applicationContainerStatuses->get($applicationId)->put($containerName, $containerStatus);
|
||||
}
|
||||
|
||||
// Track restart counts for applications
|
||||
$restartCount = data_get($container, 'RestartCount', 0);
|
||||
if (! isset($this->applicationContainerRestartCounts)) {
|
||||
$this->applicationContainerRestartCounts = collect();
|
||||
}
|
||||
if (! $this->applicationContainerRestartCounts->has($applicationId)) {
|
||||
$this->applicationContainerRestartCounts->put($applicationId, collect());
|
||||
}
|
||||
if ($containerName) {
|
||||
$this->applicationContainerRestartCounts->get($applicationId)->put($containerName, $restartCount);
|
||||
}
|
||||
} else {
|
||||
// Notify user that this container should not be there.
|
||||
}
|
||||
|
|
@ -291,7 +305,24 @@ public function handle(Server $server, ?Collection $containers = null, ?Collecti
|
|||
continue;
|
||||
}
|
||||
|
||||
$application->update(['status' => 'exited']);
|
||||
// If container was recently restarting (crash loop), keep it as degraded for a grace period
|
||||
// This prevents false "exited" status during the brief moment between container removal and recreation
|
||||
$recentlyRestarted = $application->restart_count > 0 &&
|
||||
$application->last_restart_at &&
|
||||
$application->last_restart_at->greaterThan(now()->subSeconds(30));
|
||||
|
||||
if ($recentlyRestarted) {
|
||||
// Keep it as degraded if it was recently in a crash loop
|
||||
$application->update(['status' => 'degraded (unhealthy)']);
|
||||
} else {
|
||||
// Reset restart count when application exits completely
|
||||
$application->update([
|
||||
'status' => 'exited',
|
||||
'restart_count' => 0,
|
||||
'last_restart_at' => null,
|
||||
'last_restart_type' => null,
|
||||
]);
|
||||
}
|
||||
}
|
||||
$notRunningApplicationPreviews = $previews->pluck('id')->diff($foundApplicationPreviews);
|
||||
foreach ($notRunningApplicationPreviews as $previewId) {
|
||||
|
|
@ -340,7 +371,37 @@ public function handle(Server $server, ?Collection $containers = null, ?Collecti
|
|||
continue;
|
||||
}
|
||||
|
||||
$aggregatedStatus = $this->aggregateApplicationStatus($application, $containerStatuses);
|
||||
// Track restart counts first
|
||||
$maxRestartCount = 0;
|
||||
if (isset($this->applicationContainerRestartCounts) && $this->applicationContainerRestartCounts->has($applicationId)) {
|
||||
$containerRestartCounts = $this->applicationContainerRestartCounts->get($applicationId);
|
||||
$maxRestartCount = $containerRestartCounts->max() ?? 0;
|
||||
$previousRestartCount = $application->restart_count ?? 0;
|
||||
|
||||
if ($maxRestartCount > $previousRestartCount) {
|
||||
// Restart count increased - this is a crash restart
|
||||
$application->update([
|
||||
'restart_count' => $maxRestartCount,
|
||||
'last_restart_at' => now(),
|
||||
'last_restart_type' => 'crash',
|
||||
]);
|
||||
|
||||
// Send notification
|
||||
$containerName = $application->name;
|
||||
$projectUuid = data_get($application, 'environment.project.uuid');
|
||||
$environmentName = data_get($application, 'environment.name');
|
||||
$applicationUuid = data_get($application, 'uuid');
|
||||
|
||||
if ($projectUuid && $applicationUuid && $environmentName) {
|
||||
$url = base_url().'/project/'.$projectUuid.'/'.$environmentName.'/application/'.$applicationUuid;
|
||||
} else {
|
||||
$url = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Aggregate status after tracking restart counts
|
||||
$aggregatedStatus = $this->aggregateApplicationStatus($application, $containerStatuses, $maxRestartCount);
|
||||
if ($aggregatedStatus) {
|
||||
$statusFromDb = $application->status;
|
||||
if ($statusFromDb !== $aggregatedStatus) {
|
||||
|
|
@ -355,7 +416,7 @@ public function handle(Server $server, ?Collection $containers = null, ?Collecti
|
|||
ServiceChecked::dispatch($this->server->team->id);
|
||||
}
|
||||
|
||||
private function aggregateApplicationStatus($application, Collection $containerStatuses): ?string
|
||||
private function aggregateApplicationStatus($application, Collection $containerStatuses, int $maxRestartCount = 0): ?string
|
||||
{
|
||||
// Parse docker compose to check for excluded containers
|
||||
$dockerComposeRaw = data_get($application, 'docker_compose_raw');
|
||||
|
|
@ -413,6 +474,11 @@ private function aggregateApplicationStatus($application, Collection $containerS
|
|||
return 'degraded (unhealthy)';
|
||||
}
|
||||
|
||||
// If container is exited but has restart count > 0, it's in a crash loop
|
||||
if ($hasExited && $maxRestartCount > 0) {
|
||||
return 'degraded (unhealthy)';
|
||||
}
|
||||
|
||||
if ($hasRunning && $hasExited) {
|
||||
return 'degraded (unhealthy)';
|
||||
}
|
||||
|
|
@ -421,7 +487,7 @@ private function aggregateApplicationStatus($application, Collection $containerS
|
|||
return $hasUnhealthy ? 'running (unhealthy)' : 'running (healthy)';
|
||||
}
|
||||
|
||||
// All containers are exited
|
||||
// All containers are exited with no restart count - truly stopped
|
||||
return 'exited (unhealthy)';
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -94,6 +94,14 @@ public function deploy(bool $force_rebuild = false)
|
|||
|
||||
return;
|
||||
}
|
||||
|
||||
// Reset restart count on deployment
|
||||
$this->application->update([
|
||||
'restart_count' => 0,
|
||||
'last_restart_at' => null,
|
||||
'last_restart_type' => null,
|
||||
]);
|
||||
|
||||
$this->setDeploymentUuid();
|
||||
$result = queue_application_deployment(
|
||||
application: $this->application,
|
||||
|
|
@ -137,6 +145,14 @@ public function restart()
|
|||
|
||||
return;
|
||||
}
|
||||
|
||||
// Reset restart count on manual restart
|
||||
$this->application->update([
|
||||
'restart_count' => 0,
|
||||
'last_restart_at' => now(),
|
||||
'last_restart_type' => 'manual',
|
||||
]);
|
||||
|
||||
$this->setDeploymentUuid();
|
||||
$result = queue_application_deployment(
|
||||
application: $this->application,
|
||||
|
|
|
|||
|
|
@ -121,6 +121,8 @@ class Application extends BaseModel
|
|||
|
||||
protected $casts = [
|
||||
'http_basic_auth_password' => 'encrypted',
|
||||
'restart_count' => 'integer',
|
||||
'last_restart_at' => 'datetime',
|
||||
];
|
||||
|
||||
protected static function booted()
|
||||
|
|
|
|||
|
|
@ -0,0 +1,30 @@
|
|||
<?php
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
/**
|
||||
* Run the migrations.
|
||||
*/
|
||||
public function up(): void
|
||||
{
|
||||
Schema::table('applications', function (Blueprint $table) {
|
||||
$table->integer('restart_count')->default(0)->after('status');
|
||||
$table->timestamp('last_restart_at')->nullable()->after('restart_count');
|
||||
$table->string('last_restart_type', 10)->nullable()->after('last_restart_at');
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse the migrations.
|
||||
*/
|
||||
public function down(): void
|
||||
{
|
||||
Schema::table('applications', function (Blueprint $table) {
|
||||
$table->dropColumn(['restart_count', 'last_restart_at', 'last_restart_type']);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
|
@ -12,6 +12,13 @@
|
|||
@else
|
||||
<x-status.stopped :status="$resource->status" />
|
||||
@endif
|
||||
@if (isset($resource->restart_count) && $resource->restart_count > 0 && !str($resource->status)->startsWith('exited'))
|
||||
<div class="flex items-center pl-2">
|
||||
<span class="text-xs dark:text-warning" title="Container has restarted {{ $resource->restart_count }} time{{ $resource->restart_count > 1 ? 's' : '' }}. Last restart: {{ $resource->last_restart_at?->diffForHumans() }}">
|
||||
({{ $resource->restart_count }}x restarts)
|
||||
</span>
|
||||
</div>
|
||||
@endif
|
||||
@if (!str($resource->status)->contains('exited') && $showRefreshButton)
|
||||
<button wire:loading.remove.delay.shortest wire:target="manualCheckStatus" title="Refresh Status" wire:click='manualCheckStatus'
|
||||
class="mx-1 dark:hover:fill-white fill-black dark:fill-warning">
|
||||
|
|
|
|||
|
|
@ -12,7 +12,14 @@
|
|||
</a>
|
||||
<a class="{{ request()->routeIs('project.application.logs') ? 'dark:text-white' : '' }}"
|
||||
href="{{ route('project.application.logs', $parameters) }}">
|
||||
Logs
|
||||
<div class="flex items-center gap-1">
|
||||
Logs
|
||||
@if ($application->restart_count > 0 && !str($application->status)->startsWith('exited'))
|
||||
<svg class="w-4 h-4 dark:text-warning" viewBox="0 0 24 24" fill="currentColor" xmlns="http://www.w3.org/2000/svg" title="Container has restarted {{ $application->restart_count }} time{{ $application->restart_count > 1 ? 's' : '' }}">
|
||||
<path d="M12 2L1 21h22L12 2zm0 4l7.53 13H4.47L12 6zm-1 5v4h2v-4h-2zm0 5v2h2v-2h-2z"/>
|
||||
</svg>
|
||||
@endif
|
||||
</div>
|
||||
</a>
|
||||
@if (!$application->destination->server->isSwarm())
|
||||
@can('canAccessTerminal')
|
||||
|
|
|
|||
82
tests/Unit/RestartCountTrackingTest.php
Normal file
82
tests/Unit/RestartCountTrackingTest.php
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
<?php
|
||||
|
||||
use App\Models\Application;
|
||||
use App\Models\Server;
|
||||
|
||||
beforeEach(function () {
|
||||
// Mock server
|
||||
$this->server = Mockery::mock(Server::class);
|
||||
$this->server->shouldReceive('isFunctional')->andReturn(true);
|
||||
$this->server->shouldReceive('isSwarm')->andReturn(false);
|
||||
$this->server->shouldReceive('applications')->andReturn(collect());
|
||||
|
||||
// Mock application
|
||||
$this->application = Mockery::mock(Application::class);
|
||||
$this->application->shouldReceive('getAttribute')->with('id')->andReturn(1);
|
||||
$this->application->shouldReceive('getAttribute')->with('name')->andReturn('test-app');
|
||||
$this->application->shouldReceive('getAttribute')->with('restart_count')->andReturn(0);
|
||||
$this->application->shouldReceive('getAttribute')->with('uuid')->andReturn('test-uuid');
|
||||
$this->application->shouldReceive('getAttribute')->with('environment')->andReturn(null);
|
||||
});
|
||||
|
||||
it('extracts restart count from container data', function () {
|
||||
$containerData = [
|
||||
'RestartCount' => 5,
|
||||
'State' => [
|
||||
'Status' => 'running',
|
||||
'Health' => ['Status' => 'healthy'],
|
||||
],
|
||||
'Config' => [
|
||||
'Labels' => [
|
||||
'coolify.applicationId' => '1',
|
||||
'com.docker.compose.service' => 'web',
|
||||
],
|
||||
],
|
||||
];
|
||||
|
||||
$restartCount = data_get($containerData, 'RestartCount', 0);
|
||||
|
||||
expect($restartCount)->toBe(5);
|
||||
});
|
||||
|
||||
it('defaults to zero when restart count is missing', function () {
|
||||
$containerData = [
|
||||
'State' => [
|
||||
'Status' => 'running',
|
||||
],
|
||||
'Config' => [
|
||||
'Labels' => [],
|
||||
],
|
||||
];
|
||||
|
||||
$restartCount = data_get($containerData, 'RestartCount', 0);
|
||||
|
||||
expect($restartCount)->toBe(0);
|
||||
});
|
||||
|
||||
it('detects restart count increase', function () {
|
||||
$previousRestartCount = 2;
|
||||
$currentRestartCount = 5;
|
||||
|
||||
expect($currentRestartCount)->toBeGreaterThan($previousRestartCount);
|
||||
});
|
||||
|
||||
it('identifies maximum restart count from multiple containers', function () {
|
||||
$containerRestartCounts = collect([
|
||||
'web' => 3,
|
||||
'worker' => 5,
|
||||
'scheduler' => 1,
|
||||
]);
|
||||
|
||||
$maxRestartCount = $containerRestartCounts->max();
|
||||
|
||||
expect($maxRestartCount)->toBe(5);
|
||||
});
|
||||
|
||||
it('handles empty restart counts collection', function () {
|
||||
$containerRestartCounts = collect([]);
|
||||
|
||||
$maxRestartCount = $containerRestartCounts->max() ?? 0;
|
||||
|
||||
expect($maxRestartCount)->toBe(0);
|
||||
});
|
||||
Loading…
Reference in a new issue