coolify/app/Services/ContainerStatusAggregator.php
Andras Bacsai 66e81d6d96 Fix container status display: preserve "Restarting" for applications and sub-resources
Add preserveRestarting parameter to ContainerStatusAggregator to allow applications
and service sub-resources to display "Restarting" status instead of being marked as
"Degraded". This gives better visibility into container restart behavior.

- Update ContainerStatusAggregator to accept preserveRestarting parameter (defaults to false)
- Update GetContainersStatus to use preserveRestarting: true for applications and service sub-resources
- Update PushServerUpdateJob to use preserveRestarting: true for applications and service sub-resources
- Add comprehensive documentation explaining the parameter behavior and when to use it

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-03 08:23:35 +01:00

290 lines
11 KiB
PHP

<?php
namespace App\Services;
use Illuminate\Support\Collection;
use Illuminate\Support\Facades\Log;
/**
* Container Status Aggregator Service
*
* Centralized service for aggregating container statuses into a single status string.
* Uses a priority-based state machine to determine the overall status from multiple containers.
*
* Output Format: Colon-separated (e.g., "running:healthy", "degraded:unhealthy")
* This format is used throughout the backend for consistency and machine-readability.
* UI components transform this to human-readable format (e.g., "Running (Healthy)").
*
* State Priority (highest to lowest):
* 1. Degraded (from sub-resources) → degraded:unhealthy
* 2. Restarting → degraded:unhealthy (or restarting:unknown if preserveRestarting=true)
* 3. Crash Loop (exited with restarts) → degraded:unhealthy
* 4. Mixed (running + exited) → degraded:unhealthy
* 5. Mixed (running + starting) → starting:unknown
* 6. Running → running:healthy/unhealthy/unknown
* 7. Dead/Removing → degraded:unhealthy
* 8. Paused → paused:unknown
* 9. Starting/Created → starting:unknown
* 10. Exited → exited
*
* The $preserveRestarting parameter controls whether "restarting" containers should be
* reported as "restarting:unknown" (true) or "degraded:unhealthy" (false, default).
* - Use preserveRestarting=true for individual sub-resources (ServiceApplication/ServiceDatabase)
* so they show "Restarting" in the UI.
* - Use preserveRestarting=false for overall Service status aggregation where any restarting
* container should mark the entire service as "Degraded".
*/
class ContainerStatusAggregator
{
/**
* Aggregate container statuses from status strings into a single status.
*
* @param Collection $containerStatuses Collection of status strings (e.g., "running (healthy)", "running:healthy")
* @param int $maxRestartCount Maximum restart count across containers (for crash loop detection)
* @param bool $preserveRestarting If true, "restarting" containers return "restarting:unknown" instead of "degraded:unhealthy"
* @return string Aggregated status in colon format (e.g., "running:healthy")
*/
public function aggregateFromStrings(Collection $containerStatuses, int $maxRestartCount = 0, bool $preserveRestarting = false): string
{
// Validate maxRestartCount parameter
if ($maxRestartCount < 0) {
Log::warning('Negative maxRestartCount corrected to 0', [
'original_value' => $maxRestartCount,
]);
$maxRestartCount = 0;
}
if ($maxRestartCount > 1000) {
Log::warning('High maxRestartCount detected', [
'maxRestartCount' => $maxRestartCount,
'containers' => $containerStatuses->count(),
]);
}
if ($containerStatuses->isEmpty()) {
return 'exited';
}
// Initialize state flags
$hasRunning = false;
$hasRestarting = false;
$hasUnhealthy = false;
$hasUnknown = false;
$hasExited = false;
$hasStarting = false;
$hasPaused = false;
$hasDead = false;
$hasDegraded = false;
// Parse each status string and set flags
foreach ($containerStatuses as $status) {
if (str($status)->contains('degraded')) {
$hasDegraded = true;
if (str($status)->contains('unhealthy')) {
$hasUnhealthy = true;
}
} elseif (str($status)->contains('restarting')) {
$hasRestarting = true;
} elseif (str($status)->contains('running')) {
$hasRunning = true;
if (str($status)->contains('unhealthy')) {
$hasUnhealthy = true;
}
if (str($status)->contains('unknown')) {
$hasUnknown = true;
}
} elseif (str($status)->contains('exited')) {
$hasExited = true;
} elseif (str($status)->contains('created') || str($status)->contains('starting')) {
$hasStarting = true;
} elseif (str($status)->contains('paused')) {
$hasPaused = true;
} elseif (str($status)->contains('dead') || str($status)->contains('removing')) {
$hasDead = true;
}
}
// Priority-based status resolution
return $this->resolveStatus(
$hasRunning,
$hasRestarting,
$hasUnhealthy,
$hasUnknown,
$hasExited,
$hasStarting,
$hasPaused,
$hasDead,
$hasDegraded,
$maxRestartCount,
$preserveRestarting
);
}
/**
* Aggregate container statuses from Docker container objects.
*
* @param Collection $containers Collection of Docker container objects with State property
* @param int $maxRestartCount Maximum restart count across containers (for crash loop detection)
* @param bool $preserveRestarting If true, "restarting" containers return "restarting:unknown" instead of "degraded:unhealthy"
* @return string Aggregated status in colon format (e.g., "running:healthy")
*/
public function aggregateFromContainers(Collection $containers, int $maxRestartCount = 0, bool $preserveRestarting = false): string
{
// Validate maxRestartCount parameter
if ($maxRestartCount < 0) {
Log::warning('Negative maxRestartCount corrected to 0', [
'original_value' => $maxRestartCount,
]);
$maxRestartCount = 0;
}
if ($maxRestartCount > 1000) {
Log::warning('High maxRestartCount detected', [
'maxRestartCount' => $maxRestartCount,
'containers' => $containers->count(),
]);
}
if ($containers->isEmpty()) {
return 'exited';
}
// Initialize state flags
$hasRunning = false;
$hasRestarting = false;
$hasUnhealthy = false;
$hasUnknown = false;
$hasExited = false;
$hasStarting = false;
$hasPaused = false;
$hasDead = false;
// Parse each container object and set flags
foreach ($containers as $container) {
$state = data_get($container, 'State.Status', 'exited');
$health = data_get($container, 'State.Health.Status');
if ($state === 'restarting') {
$hasRestarting = true;
} elseif ($state === 'running') {
$hasRunning = true;
if ($health === 'unhealthy') {
$hasUnhealthy = true;
} elseif (is_null($health) || $health === 'starting') {
$hasUnknown = true;
}
} elseif ($state === 'exited') {
$hasExited = true;
} elseif ($state === 'created' || $state === 'starting') {
$hasStarting = true;
} elseif ($state === 'paused') {
$hasPaused = true;
} elseif ($state === 'dead' || $state === 'removing') {
$hasDead = true;
}
}
// Priority-based status resolution
return $this->resolveStatus(
$hasRunning,
$hasRestarting,
$hasUnhealthy,
$hasUnknown,
$hasExited,
$hasStarting,
$hasPaused,
$hasDead,
false, // $hasDegraded - not applicable for container objects, only for status strings
$maxRestartCount,
$preserveRestarting
);
}
/**
* Resolve the aggregated status based on state flags (priority-based state machine).
*
* @param bool $hasRunning Has at least one running container
* @param bool $hasRestarting Has at least one restarting container
* @param bool $hasUnhealthy Has at least one unhealthy container
* @param bool $hasUnknown Has at least one container with unknown health
* @param bool $hasExited Has at least one exited container
* @param bool $hasStarting Has at least one starting/created container
* @param bool $hasPaused Has at least one paused container
* @param bool $hasDead Has at least one dead/removing container
* @param bool $hasDegraded Has at least one degraded container
* @param int $maxRestartCount Maximum restart count (for crash loop detection)
* @param bool $preserveRestarting If true, return "restarting:unknown" instead of "degraded:unhealthy" for restarting containers
* @return string Status in colon format (e.g., "running:healthy")
*/
private function resolveStatus(
bool $hasRunning,
bool $hasRestarting,
bool $hasUnhealthy,
bool $hasUnknown,
bool $hasExited,
bool $hasStarting,
bool $hasPaused,
bool $hasDead,
bool $hasDegraded,
int $maxRestartCount,
bool $preserveRestarting = false
): string {
// Priority 1: Degraded containers from sub-resources (highest priority)
// If any service/application within a service stack is degraded, the entire stack is degraded
if ($hasDegraded) {
return 'degraded:unhealthy';
}
// Priority 2: Restarting containers
// When preserveRestarting is true (for individual sub-resources), keep as "restarting"
// When false (for overall service status), mark as "degraded"
if ($hasRestarting) {
return $preserveRestarting ? 'restarting:unknown' : 'degraded:unhealthy';
}
// Priority 3: Crash loop detection (exited with restart count > 0)
if ($hasExited && $maxRestartCount > 0) {
return 'degraded:unhealthy';
}
// Priority 4: Mixed state (some running, some exited = degraded)
if ($hasRunning && $hasExited) {
return 'degraded:unhealthy';
}
// Priority 5: Mixed state (some running, some starting = still starting)
// If any component is still starting, the entire service stack is not fully ready
if ($hasRunning && $hasStarting) {
return 'starting:unknown';
}
// Priority 6: Running containers (check health status)
if ($hasRunning) {
if ($hasUnhealthy) {
return 'running:unhealthy';
} elseif ($hasUnknown) {
return 'running:unknown';
} else {
return 'running:healthy';
}
}
// Priority 7: Dead or removing containers
if ($hasDead) {
return 'degraded:unhealthy';
}
// Priority 8: Paused containers
if ($hasPaused) {
return 'paused:unknown';
}
// Priority 9: Starting/created containers
if ($hasStarting) {
return 'starting:unknown';
}
// Priority 10: All containers exited (no restart count = truly stopped)
return 'exited';
}
}