Enhance container status tracking and improve user notifications (#7182)

This commit is contained in:
Andras Bacsai 2025-11-10 13:58:22 +01:00 committed by GitHub
commit e63a270fea
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 215 additions and 5 deletions

View file

@ -28,6 +28,8 @@ class GetContainersStatus
protected ?Collection $applicationContainerStatuses;
protected ?Collection $applicationContainerRestartCounts;
public function handle(Server $server, ?Collection $containers = null, ?Collection $containerReplicates = null)
{
$this->containers = $containers;
@ -136,6 +138,18 @@ public function handle(Server $server, ?Collection $containers = null, ?Collecti
if ($containerName) {
$this->applicationContainerStatuses->get($applicationId)->put($containerName, $containerStatus);
}
// Track restart counts for applications
$restartCount = data_get($container, 'RestartCount', 0);
if (! isset($this->applicationContainerRestartCounts)) {
$this->applicationContainerRestartCounts = collect();
}
if (! $this->applicationContainerRestartCounts->has($applicationId)) {
$this->applicationContainerRestartCounts->put($applicationId, collect());
}
if ($containerName) {
$this->applicationContainerRestartCounts->get($applicationId)->put($containerName, $restartCount);
}
} else {
// Notify user that this container should not be there.
}
@ -291,7 +305,24 @@ public function handle(Server $server, ?Collection $containers = null, ?Collecti
continue;
}
$application->update(['status' => 'exited']);
// If container was recently restarting (crash loop), keep it as degraded for a grace period
// This prevents false "exited" status during the brief moment between container removal and recreation
$recentlyRestarted = $application->restart_count > 0 &&
$application->last_restart_at &&
$application->last_restart_at->greaterThan(now()->subSeconds(30));
if ($recentlyRestarted) {
// Keep it as degraded if it was recently in a crash loop
$application->update(['status' => 'degraded (unhealthy)']);
} else {
// Reset restart count when application exits completely
$application->update([
'status' => 'exited',
'restart_count' => 0,
'last_restart_at' => null,
'last_restart_type' => null,
]);
}
}
$notRunningApplicationPreviews = $previews->pluck('id')->diff($foundApplicationPreviews);
foreach ($notRunningApplicationPreviews as $previewId) {
@ -340,7 +371,37 @@ public function handle(Server $server, ?Collection $containers = null, ?Collecti
continue;
}
$aggregatedStatus = $this->aggregateApplicationStatus($application, $containerStatuses);
// Track restart counts first
$maxRestartCount = 0;
if (isset($this->applicationContainerRestartCounts) && $this->applicationContainerRestartCounts->has($applicationId)) {
$containerRestartCounts = $this->applicationContainerRestartCounts->get($applicationId);
$maxRestartCount = $containerRestartCounts->max() ?? 0;
$previousRestartCount = $application->restart_count ?? 0;
if ($maxRestartCount > $previousRestartCount) {
// Restart count increased - this is a crash restart
$application->update([
'restart_count' => $maxRestartCount,
'last_restart_at' => now(),
'last_restart_type' => 'crash',
]);
// Send notification
$containerName = $application->name;
$projectUuid = data_get($application, 'environment.project.uuid');
$environmentName = data_get($application, 'environment.name');
$applicationUuid = data_get($application, 'uuid');
if ($projectUuid && $applicationUuid && $environmentName) {
$url = base_url().'/project/'.$projectUuid.'/'.$environmentName.'/application/'.$applicationUuid;
} else {
$url = null;
}
}
}
// Aggregate status after tracking restart counts
$aggregatedStatus = $this->aggregateApplicationStatus($application, $containerStatuses, $maxRestartCount);
if ($aggregatedStatus) {
$statusFromDb = $application->status;
if ($statusFromDb !== $aggregatedStatus) {
@ -355,7 +416,7 @@ public function handle(Server $server, ?Collection $containers = null, ?Collecti
ServiceChecked::dispatch($this->server->team->id);
}
private function aggregateApplicationStatus($application, Collection $containerStatuses): ?string
private function aggregateApplicationStatus($application, Collection $containerStatuses, int $maxRestartCount = 0): ?string
{
// Parse docker compose to check for excluded containers
$dockerComposeRaw = data_get($application, 'docker_compose_raw');
@ -413,6 +474,11 @@ private function aggregateApplicationStatus($application, Collection $containerS
return 'degraded (unhealthy)';
}
// If container is exited but has restart count > 0, it's in a crash loop
if ($hasExited && $maxRestartCount > 0) {
return 'degraded (unhealthy)';
}
if ($hasRunning && $hasExited) {
return 'degraded (unhealthy)';
}
@ -421,7 +487,7 @@ private function aggregateApplicationStatus($application, Collection $containerS
return $hasUnhealthy ? 'running (unhealthy)' : 'running (healthy)';
}
// All containers are exited
// All containers are exited with no restart count - truly stopped
return 'exited (unhealthy)';
}
}

View file

@ -94,6 +94,14 @@ public function deploy(bool $force_rebuild = false)
return;
}
// Reset restart count on deployment
$this->application->update([
'restart_count' => 0,
'last_restart_at' => null,
'last_restart_type' => null,
]);
$this->setDeploymentUuid();
$result = queue_application_deployment(
application: $this->application,
@ -137,6 +145,14 @@ public function restart()
return;
}
// Reset restart count on manual restart
$this->application->update([
'restart_count' => 0,
'last_restart_at' => now(),
'last_restart_type' => 'manual',
]);
$this->setDeploymentUuid();
$result = queue_application_deployment(
application: $this->application,

View file

@ -121,6 +121,8 @@ class Application extends BaseModel
protected $casts = [
'http_basic_auth_password' => 'encrypted',
'restart_count' => 'integer',
'last_restart_at' => 'datetime',
];
protected static function booted()

View file

@ -0,0 +1,30 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
/**
* Run the migrations.
*/
public function up(): void
{
Schema::table('applications', function (Blueprint $table) {
$table->integer('restart_count')->default(0)->after('status');
$table->timestamp('last_restart_at')->nullable()->after('restart_count');
$table->string('last_restart_type', 10)->nullable()->after('last_restart_at');
});
}
/**
* Reverse the migrations.
*/
public function down(): void
{
Schema::table('applications', function (Blueprint $table) {
$table->dropColumn(['restart_count', 'last_restart_at', 'last_restart_type']);
});
}
};

View file

@ -12,6 +12,13 @@
@else
<x-status.stopped :status="$resource->status" />
@endif
@if (isset($resource->restart_count) && $resource->restart_count > 0 && !str($resource->status)->startsWith('exited'))
<div class="flex items-center pl-2">
<span class="text-xs dark:text-warning" title="Container has restarted {{ $resource->restart_count }} time{{ $resource->restart_count > 1 ? 's' : '' }}. Last restart: {{ $resource->last_restart_at?->diffForHumans() }}">
({{ $resource->restart_count }}x restarts)
</span>
</div>
@endif
@if (!str($resource->status)->contains('exited') && $showRefreshButton)
<button wire:loading.remove.delay.shortest wire:target="manualCheckStatus" title="Refresh Status" wire:click='manualCheckStatus'
class="mx-1 dark:hover:fill-white fill-black dark:fill-warning">

View file

@ -12,7 +12,14 @@
</a>
<a class="{{ request()->routeIs('project.application.logs') ? 'dark:text-white' : '' }}"
href="{{ route('project.application.logs', $parameters) }}">
Logs
<div class="flex items-center gap-1">
Logs
@if ($application->restart_count > 0 && !str($application->status)->startsWith('exited'))
<svg class="w-4 h-4 dark:text-warning" viewBox="0 0 24 24" fill="currentColor" xmlns="http://www.w3.org/2000/svg" title="Container has restarted {{ $application->restart_count }} time{{ $application->restart_count > 1 ? 's' : '' }}">
<path d="M12 2L1 21h22L12 2zm0 4l7.53 13H4.47L12 6zm-1 5v4h2v-4h-2zm0 5v2h2v-2h-2z"/>
</svg>
@endif
</div>
</a>
@if (!$application->destination->server->isSwarm())
@can('canAccessTerminal')

View file

@ -0,0 +1,82 @@
<?php
use App\Models\Application;
use App\Models\Server;
beforeEach(function () {
// Mock server
$this->server = Mockery::mock(Server::class);
$this->server->shouldReceive('isFunctional')->andReturn(true);
$this->server->shouldReceive('isSwarm')->andReturn(false);
$this->server->shouldReceive('applications')->andReturn(collect());
// Mock application
$this->application = Mockery::mock(Application::class);
$this->application->shouldReceive('getAttribute')->with('id')->andReturn(1);
$this->application->shouldReceive('getAttribute')->with('name')->andReturn('test-app');
$this->application->shouldReceive('getAttribute')->with('restart_count')->andReturn(0);
$this->application->shouldReceive('getAttribute')->with('uuid')->andReturn('test-uuid');
$this->application->shouldReceive('getAttribute')->with('environment')->andReturn(null);
});
it('extracts restart count from container data', function () {
$containerData = [
'RestartCount' => 5,
'State' => [
'Status' => 'running',
'Health' => ['Status' => 'healthy'],
],
'Config' => [
'Labels' => [
'coolify.applicationId' => '1',
'com.docker.compose.service' => 'web',
],
],
];
$restartCount = data_get($containerData, 'RestartCount', 0);
expect($restartCount)->toBe(5);
});
it('defaults to zero when restart count is missing', function () {
$containerData = [
'State' => [
'Status' => 'running',
],
'Config' => [
'Labels' => [],
],
];
$restartCount = data_get($containerData, 'RestartCount', 0);
expect($restartCount)->toBe(0);
});
it('detects restart count increase', function () {
$previousRestartCount = 2;
$currentRestartCount = 5;
expect($currentRestartCount)->toBeGreaterThan($previousRestartCount);
});
it('identifies maximum restart count from multiple containers', function () {
$containerRestartCounts = collect([
'web' => 3,
'worker' => 5,
'scheduler' => 1,
]);
$maxRestartCount = $containerRestartCounts->max();
expect($maxRestartCount)->toBe(5);
});
it('handles empty restart counts collection', function () {
$containerRestartCounts = collect([]);
$maxRestartCount = $containerRestartCounts->max() ?? 0;
expect($maxRestartCount)->toBe(0);
});