From 68a9f2ca77eec9ff8221103dff9ed4ea805c5106 Mon Sep 17 00:00:00 2001
From: Andras Bacsai <5845193+andrasbacsai@users.noreply.github.com>
Date: Mon, 10 Nov 2025 13:04:31 +0100
Subject: [PATCH] feat: add container restart tracking and crash loop detection
Track container restart counts from Docker and detect crash loops to provide better visibility into application health issues.
- Add restart_count, last_restart_at, and last_restart_type columns to applications table
- Detect restart count increases from Docker inspect data and send notifications
- Show restart count badge in UI with warning icon on Logs navigation
- Distinguish between crash restarts and manual restarts
- Implement 30-second grace period to prevent false "exited" status during crash loops
- Reset restart count on manual stop, restart, and redeploy actions
- Add unit tests for restart count tracking logic
This helps users quickly identify when containers are in crash loops and need attention, even when the container status flickers between states during Docker's restart backoff period.
---
app/Actions/Docker/GetContainersStatus.php | 74 ++++++++++++++++-
app/Livewire/Project/Application/Heading.php | 16 ++++
app/Models/Application.php | 2 +
...restart_tracking_to_applications_table.php | 30 +++++++
.../views/components/status/index.blade.php | 7 ++
.../project/application/heading.blade.php | 9 +-
templates/service-templates-latest.json | 16 ++--
templates/service-templates.json | 16 ++--
tests/Unit/RestartCountTrackingTest.php | 82 +++++++++++++++++++
9 files changed, 231 insertions(+), 21 deletions(-)
create mode 100644 database/migrations/2025_11_10_112500_add_restart_tracking_to_applications_table.php
create mode 100644 tests/Unit/RestartCountTrackingTest.php
diff --git a/app/Actions/Docker/GetContainersStatus.php b/app/Actions/Docker/GetContainersStatus.php
index f5d5f82b6..a985871dc 100644
--- a/app/Actions/Docker/GetContainersStatus.php
+++ b/app/Actions/Docker/GetContainersStatus.php
@@ -28,6 +28,8 @@ class GetContainersStatus
protected ?Collection $applicationContainerStatuses;
+ protected ?Collection $applicationContainerRestartCounts;
+
public function handle(Server $server, ?Collection $containers = null, ?Collection $containerReplicates = null)
{
$this->containers = $containers;
@@ -136,6 +138,18 @@ public function handle(Server $server, ?Collection $containers = null, ?Collecti
if ($containerName) {
$this->applicationContainerStatuses->get($applicationId)->put($containerName, $containerStatus);
}
+
+ // Track restart counts for applications
+ $restartCount = data_get($container, 'RestartCount', 0);
+ if (! isset($this->applicationContainerRestartCounts)) {
+ $this->applicationContainerRestartCounts = collect();
+ }
+ if (! $this->applicationContainerRestartCounts->has($applicationId)) {
+ $this->applicationContainerRestartCounts->put($applicationId, collect());
+ }
+ if ($containerName) {
+ $this->applicationContainerRestartCounts->get($applicationId)->put($containerName, $restartCount);
+ }
} else {
// Notify user that this container should not be there.
}
@@ -291,7 +305,24 @@ public function handle(Server $server, ?Collection $containers = null, ?Collecti
continue;
}
- $application->update(['status' => 'exited']);
+ // If container was recently restarting (crash loop), keep it as degraded for a grace period
+ // This prevents false "exited" status during the brief moment between container removal and recreation
+ $recentlyRestarted = $application->restart_count > 0 &&
+ $application->last_restart_at &&
+ $application->last_restart_at->greaterThan(now()->subSeconds(30));
+
+ if ($recentlyRestarted) {
+ // Keep it as degraded if it was recently in a crash loop
+ $application->update(['status' => 'degraded (unhealthy)']);
+ } else {
+ // Reset restart count when application exits completely
+ $application->update([
+ 'status' => 'exited',
+ 'restart_count' => 0,
+ 'last_restart_at' => null,
+ 'last_restart_type' => null,
+ ]);
+ }
}
$notRunningApplicationPreviews = $previews->pluck('id')->diff($foundApplicationPreviews);
foreach ($notRunningApplicationPreviews as $previewId) {
@@ -340,7 +371,37 @@ public function handle(Server $server, ?Collection $containers = null, ?Collecti
continue;
}
- $aggregatedStatus = $this->aggregateApplicationStatus($application, $containerStatuses);
+ // Track restart counts first
+ $maxRestartCount = 0;
+ if (isset($this->applicationContainerRestartCounts) && $this->applicationContainerRestartCounts->has($applicationId)) {
+ $containerRestartCounts = $this->applicationContainerRestartCounts->get($applicationId);
+ $maxRestartCount = $containerRestartCounts->max() ?? 0;
+ $previousRestartCount = $application->restart_count ?? 0;
+
+ if ($maxRestartCount > $previousRestartCount) {
+ // Restart count increased - this is a crash restart
+ $application->update([
+ 'restart_count' => $maxRestartCount,
+ 'last_restart_at' => now(),
+ 'last_restart_type' => 'crash',
+ ]);
+
+ // Send notification
+ $containerName = $application->name;
+ $projectUuid = data_get($application, 'environment.project.uuid');
+ $environmentName = data_get($application, 'environment.name');
+ $applicationUuid = data_get($application, 'uuid');
+
+ if ($projectUuid && $applicationUuid && $environmentName) {
+ $url = base_url().'/project/'.$projectUuid.'/'.$environmentName.'/application/'.$applicationUuid;
+ } else {
+ $url = null;
+ }
+ }
+ }
+
+ // Aggregate status after tracking restart counts
+ $aggregatedStatus = $this->aggregateApplicationStatus($application, $containerStatuses, $maxRestartCount);
if ($aggregatedStatus) {
$statusFromDb = $application->status;
if ($statusFromDb !== $aggregatedStatus) {
@@ -355,7 +416,7 @@ public function handle(Server $server, ?Collection $containers = null, ?Collecti
ServiceChecked::dispatch($this->server->team->id);
}
- private function aggregateApplicationStatus($application, Collection $containerStatuses): ?string
+ private function aggregateApplicationStatus($application, Collection $containerStatuses, int $maxRestartCount = 0): ?string
{
// Parse docker compose to check for excluded containers
$dockerComposeRaw = data_get($application, 'docker_compose_raw');
@@ -413,6 +474,11 @@ private function aggregateApplicationStatus($application, Collection $containerS
return 'degraded (unhealthy)';
}
+ // If container is exited but has restart count > 0, it's in a crash loop
+ if ($hasExited && $maxRestartCount > 0) {
+ return 'degraded (unhealthy)';
+ }
+
if ($hasRunning && $hasExited) {
return 'degraded (unhealthy)';
}
@@ -421,7 +487,7 @@ private function aggregateApplicationStatus($application, Collection $containerS
return $hasUnhealthy ? 'running (unhealthy)' : 'running (healthy)';
}
- // All containers are exited
+ // All containers are exited with no restart count - truly stopped
return 'exited (unhealthy)';
}
}
diff --git a/app/Livewire/Project/Application/Heading.php b/app/Livewire/Project/Application/Heading.php
index 5231438e5..2c20926a3 100644
--- a/app/Livewire/Project/Application/Heading.php
+++ b/app/Livewire/Project/Application/Heading.php
@@ -94,6 +94,14 @@ public function deploy(bool $force_rebuild = false)
return;
}
+
+ // Reset restart count on deployment
+ $this->application->update([
+ 'restart_count' => 0,
+ 'last_restart_at' => null,
+ 'last_restart_type' => null,
+ ]);
+
$this->setDeploymentUuid();
$result = queue_application_deployment(
application: $this->application,
@@ -137,6 +145,14 @@ public function restart()
return;
}
+
+ // Reset restart count on manual restart
+ $this->application->update([
+ 'restart_count' => 0,
+ 'last_restart_at' => now(),
+ 'last_restart_type' => 'manual',
+ ]);
+
$this->setDeploymentUuid();
$result = queue_application_deployment(
application: $this->application,
diff --git a/app/Models/Application.php b/app/Models/Application.php
index 615e35f68..be340375f 100644
--- a/app/Models/Application.php
+++ b/app/Models/Application.php
@@ -121,6 +121,8 @@ class Application extends BaseModel
protected $casts = [
'http_basic_auth_password' => 'encrypted',
+ 'restart_count' => 'integer',
+ 'last_restart_at' => 'datetime',
];
protected static function booted()
diff --git a/database/migrations/2025_11_10_112500_add_restart_tracking_to_applications_table.php b/database/migrations/2025_11_10_112500_add_restart_tracking_to_applications_table.php
new file mode 100644
index 000000000..329ac7af9
--- /dev/null
+++ b/database/migrations/2025_11_10_112500_add_restart_tracking_to_applications_table.php
@@ -0,0 +1,30 @@
+integer('restart_count')->default(0)->after('status');
+ $table->timestamp('last_restart_at')->nullable()->after('restart_count');
+ $table->string('last_restart_type', 10)->nullable()->after('last_restart_at');
+ });
+ }
+
+ /**
+ * Reverse the migrations.
+ */
+ public function down(): void
+ {
+ Schema::table('applications', function (Blueprint $table) {
+ $table->dropColumn(['restart_count', 'last_restart_at', 'last_restart_type']);
+ });
+ }
+};
diff --git a/resources/views/components/status/index.blade.php b/resources/views/components/status/index.blade.php
index d592cff79..57e5409c6 100644
--- a/resources/views/components/status/index.blade.php
+++ b/resources/views/components/status/index.blade.php
@@ -12,6 +12,13 @@
@else
@endif
+@if (isset($resource->restart_count) && $resource->restart_count > 0 && !str($resource->status)->startsWith('exited'))
+
+
+ ({{ $resource->restart_count }}x restarts)
+
+
+@endif
@if (!str($resource->status)->contains('exited') && $showRefreshButton)