coolify/app/Actions/Docker/GetContainersStatus.php

585 lines
26 KiB
PHP
Raw Normal View History

2024-05-07 13:41:50 +00:00
<?php
namespace App\Actions\Docker;
use App\Actions\Database\StartDatabaseProxy;
use App\Actions\Shared\ComplexStatusCheck;
use App\Events\ServiceChecked;
2024-05-07 13:41:50 +00:00
use App\Models\ApplicationPreview;
use App\Models\Server;
use App\Models\ServiceDatabase;
use Illuminate\Support\Arr;
use Illuminate\Support\Collection;
use Illuminate\Support\Facades\DB;
debug: add comprehensive status change logging Added detailed debug logging to all status update paths to help diagnose why "unhealthy" status appears in the UI. ## Logging Added ### 1. PushServerUpdateJob (Sentinel updates) **Location**: Lines 303-315 **Logs**: Status changes from Sentinel push updates **Data tracked**: - Old vs new status - Container statuses that led to aggregation - Status flags (hasRunning, hasUnhealthy, hasUnknown) ### 2. GetContainersStatus (SSH updates) **Location**: Lines 441-449, 346-354, 358-365 **Logs**: Status changes from SSH-based checks **Scenarios**: - Normal status aggregation - Recently restarted containers (kept as degraded) - Applications not running (set to exited) **Data tracked**: - Old vs new status - Container statuses - Restart count and timing - Whether containers exist ### 3. Application Model Status Accessor **Location**: Lines 706-712, 726-732 **Logs**: When status is set without explicit health information **Issue**: Highlights cases where health defaults to "unhealthy" **Data tracked**: - Raw value passed to setter - Final result after default applied ## How to Use ### Enable Debug Logging Edit `.env` or `config/logging.php` to set log level to debug: ``` LOG_LEVEL=debug ``` ### Monitor Logs ```bash tail -f storage/logs/laravel.log | grep STATUS-DEBUG ``` ### Log Format All logs use `[STATUS-DEBUG]` prefix for easy filtering: ``` [2025-11-19 13:00:00] local.DEBUG: [STATUS-DEBUG] Sentinel status change { "source": "PushServerUpdateJob", "app_id": 123, "app_name": "my-app", "old_status": "running:unknown", "new_status": "running:healthy", "container_statuses": [...], "flags": {...} } ``` ## What to Look For 1. **Default to unhealthy**: Check Application model accessor logs 2. **Status flipping**: Compare timestamps between Sentinel and SSH updates 3. **Incorrect aggregation**: Check flags and container_statuses 4. **Stale database values**: Check if old_status persists across multiple logs ## Next Steps After gathering logs, we can: 1. Identify the exact source of "unhealthy" status 2. Determine if it's a default issue, aggregation bug, or timing problem 3. Apply targeted fix based on evidence 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-19 12:52:08 +00:00
use Illuminate\Support\Facades\Log;
2024-05-07 13:41:50 +00:00
use Lorisleiva\Actions\Concerns\AsAction;
class GetContainersStatus
{
use AsAction;
2024-06-10 20:43:34 +00:00
public string $jobQueue = 'high';
2024-05-07 13:41:50 +00:00
public $applications;
2024-06-10 20:43:34 +00:00
public ?Collection $containers;
public ?Collection $containerReplicates;
public $server;
protected ?Collection $applicationContainerStatuses;
protected ?Collection $applicationContainerRestartCounts;
public function handle(Server $server, ?Collection $containers = null, ?Collection $containerReplicates = null)
2024-05-07 13:41:50 +00:00
{
$this->containers = $containers;
$this->containerReplicates = $containerReplicates;
$this->server = $server;
2024-06-10 20:43:34 +00:00
if (! $this->server->isFunctional()) {
return 'Server is not functional.';
2024-06-10 20:43:34 +00:00
}
2024-05-07 13:41:50 +00:00
$this->applications = $this->server->applications();
$skip_these_applications = collect([]);
foreach ($this->applications as $application) {
if ($application->additional_servers->count() > 0) {
$skip_these_applications->push($application);
ComplexStatusCheck::run($application);
$this->applications = $this->applications->filter(function ($value, $key) use ($application) {
return $value->id !== $application->id;
});
}
}
$this->applications = $this->applications->filter(function ($value, $key) use ($skip_these_applications) {
2024-06-10 20:43:34 +00:00
return ! $skip_these_applications->pluck('id')->contains($value->id);
2024-05-07 13:41:50 +00:00
});
if ($this->containers === null) {
2024-10-22 10:01:46 +00:00
['containers' => $this->containers, 'containerReplicates' => $this->containerReplicates] = $this->server->getContainers();
2024-05-07 13:41:50 +00:00
}
if (is_null($this->containers)) {
return;
2024-05-07 13:41:50 +00:00
}
if ($this->containerReplicates) {
foreach ($this->containerReplicates as $containerReplica) {
$name = data_get($containerReplica, 'Name');
$this->containers = $this->containers->map(function ($container) use ($name, $containerReplica) {
2024-05-07 13:41:50 +00:00
if (data_get($container, 'Spec.Name') === $name) {
$replicas = data_get($containerReplica, 'Replicas');
2024-05-07 13:41:50 +00:00
$running = str($replicas)->explode('/')[0];
$total = str($replicas)->explode('/')[1];
if ($running === $total) {
data_set($container, 'State.Status', 'running');
data_set($container, 'State.Health.Status', 'healthy');
} else {
data_set($container, 'State.Status', 'starting');
data_set($container, 'State.Health.Status', 'unhealthy');
}
}
2024-06-10 20:43:34 +00:00
2024-05-07 13:41:50 +00:00
return $container;
});
}
}
$databases = $this->server->databases();
$services = $this->server->services()->get();
$previews = $this->server->previews();
$foundApplications = [];
$foundApplicationPreviews = [];
$foundDatabases = [];
$foundServices = [];
foreach ($this->containers as $container) {
2024-05-07 13:41:50 +00:00
if ($this->server->isSwarm()) {
$labels = data_get($container, 'Spec.Labels');
$uuid = data_get($labels, 'coolify.name');
} else {
$labels = data_get($container, 'Config.Labels');
}
$containerStatus = data_get($container, 'State.Status');
$containerHealth = data_get($container, 'State.Health.Status');
if ($containerStatus === 'restarting') {
$healthSuffix = $containerHealth ?? 'unknown';
$containerStatus = "restarting ($healthSuffix)";
} else {
$healthSuffix = $containerHealth ?? 'unknown';
$containerStatus = "$containerStatus ($healthSuffix)";
}
2024-05-07 13:41:50 +00:00
$labels = Arr::undot(format_docker_labels_to_json($labels));
$applicationId = data_get($labels, 'coolify.applicationId');
if ($applicationId) {
$pullRequestId = data_get($labels, 'coolify.pullRequestId');
if ($pullRequestId) {
if (str($applicationId)->contains('-')) {
$applicationId = str($applicationId)->before('-');
}
$preview = ApplicationPreview::where('application_id', $applicationId)->where('pull_request_id', $pullRequestId)->first();
2024-05-07 13:41:50 +00:00
if ($preview) {
$foundApplicationPreviews[] = $preview->id;
$statusFromDb = $preview->status;
if ($statusFromDb !== $containerStatus) {
$preview->update(['status' => $containerStatus]);
} else {
$preview->update(['last_online_at' => now()]);
2024-05-07 13:41:50 +00:00
}
} else {
2025-01-07 13:52:08 +00:00
// Notify user that this container should not be there.
2024-05-07 13:41:50 +00:00
}
} else {
$application = $this->applications->where('id', $applicationId)->first();
if ($application) {
$foundApplications[] = $application->id;
// Store container status for aggregation
if (! isset($this->applicationContainerStatuses)) {
$this->applicationContainerStatuses = collect();
}
if (! $this->applicationContainerStatuses->has($applicationId)) {
$this->applicationContainerStatuses->put($applicationId, collect());
}
$containerName = data_get($labels, 'com.docker.compose.service');
if ($containerName) {
$this->applicationContainerStatuses->get($applicationId)->put($containerName, $containerStatus);
2024-05-07 13:41:50 +00:00
}
// Track restart counts for applications
$restartCount = data_get($container, 'RestartCount', 0);
if (! isset($this->applicationContainerRestartCounts)) {
$this->applicationContainerRestartCounts = collect();
}
if (! $this->applicationContainerRestartCounts->has($applicationId)) {
$this->applicationContainerRestartCounts->put($applicationId, collect());
}
if ($containerName) {
$this->applicationContainerRestartCounts->get($applicationId)->put($containerName, $restartCount);
}
2024-05-07 13:41:50 +00:00
} else {
2025-01-07 13:52:08 +00:00
// Notify user that this container should not be there.
2024-05-07 13:41:50 +00:00
}
}
} else {
$uuid = data_get($labels, 'com.docker.compose.service');
$type = data_get($labels, 'coolify.type');
if ($uuid) {
if ($type === 'service') {
$database_id = data_get($labels, 'coolify.service.subId');
if ($database_id) {
$service_db = ServiceDatabase::where('id', $database_id)->first();
2024-05-07 13:41:50 +00:00
if ($service_db) {
2024-05-21 12:29:06 +00:00
$uuid = data_get($service_db, 'service.uuid');
if ($uuid) {
$isPublic = data_get($service_db, 'is_public');
if ($isPublic) {
$foundTcpProxy = $this->containers->filter(function ($value, $key) use ($uuid) {
2024-05-21 12:29:06 +00:00
if ($this->server->isSwarm()) {
return data_get($value, 'Spec.Name') === "coolify-proxy_$uuid";
} else {
return data_get($value, 'Name') === "/$uuid-proxy";
2024-05-21 12:29:06 +00:00
}
})->first();
2024-06-10 20:43:34 +00:00
if (! $foundTcpProxy) {
2024-05-21 12:29:06 +00:00
StartDatabaseProxy::run($service_db);
// $this->server->team?->notify(new ContainerRestarted("TCP Proxy for {$service_db->service->name}", $this->server));
2024-05-07 13:41:50 +00:00
}
}
}
}
}
} else {
$database = $databases->where('uuid', $uuid)->first();
if ($database) {
$isPublic = data_get($database, 'is_public');
$foundDatabases[] = $database->id;
$statusFromDb = $database->status;
if ($statusFromDb !== $containerStatus) {
$database->update(['status' => $containerStatus]);
} else {
$database->update(['last_online_at' => now()]);
2024-05-07 13:41:50 +00:00
}
2024-05-07 13:41:50 +00:00
if ($isPublic) {
$foundTcpProxy = $this->containers->filter(function ($value, $key) use ($uuid) {
2024-05-07 13:41:50 +00:00
if ($this->server->isSwarm()) {
return data_get($value, 'Spec.Name') === "coolify-proxy_$uuid";
} else {
return data_get($value, 'Name') === "/$uuid-proxy";
2024-05-07 13:41:50 +00:00
}
})->first();
2024-06-10 20:43:34 +00:00
if (! $foundTcpProxy) {
2024-05-07 13:41:50 +00:00
StartDatabaseProxy::run($database);
2024-12-02 21:49:41 +00:00
// $this->server->team?->notify(new ContainerRestarted("TCP Proxy for database", $this->server));
2024-05-07 13:41:50 +00:00
}
}
} else {
// Notify user that this container should not be there.
}
}
}
if (data_get($container, 'Name') === '/coolify-db') {
$foundDatabases[] = 0;
}
}
$serviceLabelId = data_get($labels, 'coolify.serviceId');
if ($serviceLabelId) {
$subType = data_get($labels, 'coolify.service.subType');
$subId = data_get($labels, 'coolify.service.subId');
$parentService = $services->where('id', $serviceLabelId)->first();
if (! $parentService) {
2024-05-07 13:41:50 +00:00
continue;
}
// Check if this container is excluded from health checks
$containerName = data_get($labels, 'com.docker.compose.service');
$isExcluded = false;
if ($containerName) {
$dockerComposeRaw = data_get($parentService, 'docker_compose_raw');
if ($dockerComposeRaw) {
try {
$dockerCompose = \Symfony\Component\Yaml\Yaml::parse($dockerComposeRaw);
$serviceConfig = data_get($dockerCompose, "services.{$containerName}", []);
$excludeFromHc = data_get($serviceConfig, 'exclude_from_hc', false);
$restartPolicy = data_get($serviceConfig, 'restart', 'always');
if ($excludeFromHc || $restartPolicy === 'no') {
$isExcluded = true;
}
} catch (\Exception $e) {
// If we can't parse, treat as not excluded
}
}
}
// Append :excluded suffix if container is excluded
if ($isExcluded) {
$containerStatus = str_replace(')', ':excluded)', $containerStatus);
}
2024-05-07 13:41:50 +00:00
if ($subType === 'application') {
$service = $parentService->applications()->where('id', $subId)->first();
2024-05-07 13:41:50 +00:00
} else {
$service = $parentService->databases()->where('id', $subId)->first();
2024-05-07 13:41:50 +00:00
}
if ($service) {
$foundServices[] = "$service->id-$service->name";
$statusFromDb = $service->status;
if ($statusFromDb !== $containerStatus) {
$service->update(['status' => $containerStatus]);
} else {
$service->update(['last_online_at' => now()]);
2024-05-07 13:41:50 +00:00
}
}
}
}
$exitedServices = collect([]);
foreach ($services as $service) {
$apps = $service->applications()->get();
$dbs = $service->databases()->get();
foreach ($apps as $app) {
if (in_array("$app->id-$app->name", $foundServices)) {
continue;
} else {
$exitedServices->push($app);
2024-05-07 13:41:50 +00:00
}
}
foreach ($dbs as $db) {
if (in_array("$db->id-$db->name", $foundServices)) {
continue;
} else {
$exitedServices->push($db);
2024-05-07 13:41:50 +00:00
}
}
}
2024-09-24 18:40:41 +00:00
$exitedServices = $exitedServices->unique('uuid');
2024-05-07 13:41:50 +00:00
foreach ($exitedServices as $exitedService) {
if (str($exitedService->status)->startsWith('exited')) {
continue;
}
$name = data_get($exitedService, 'name');
$fqdn = data_get($exitedService, 'fqdn');
if ($name) {
if ($fqdn) {
$containerName = "$name, available at $fqdn";
} else {
$containerName = $name;
}
} else {
if ($fqdn) {
$containerName = $fqdn;
} else {
$containerName = null;
}
}
2024-05-07 13:41:50 +00:00
$projectUuid = data_get($service, 'environment.project.uuid');
$serviceUuid = data_get($service, 'uuid');
$environmentName = data_get($service, 'environment.name');
if ($projectUuid && $serviceUuid && $environmentName) {
2024-12-02 21:49:55 +00:00
$url = base_url().'/project/'.$projectUuid.'/'.$environmentName.'/service/'.$serviceUuid;
2024-05-07 13:41:50 +00:00
} else {
$url = null;
}
// $this->server->team?->notify(new ContainerStopped($containerName, $this->server, $url));
2024-05-07 13:41:50 +00:00
$exitedService->update(['status' => 'exited']);
}
$notRunningApplications = $this->applications->pluck('id')->diff($foundApplications);
foreach ($notRunningApplications as $applicationId) {
$application = $this->applications->where('id', $applicationId)->first();
2024-05-07 13:41:50 +00:00
if (str($application->status)->startsWith('exited')) {
continue;
}
// Only protection: If no containers at all, Docker query might have failed
if ($this->containers->isEmpty()) {
continue;
2024-05-07 13:41:50 +00:00
}
// If container was recently restarting (crash loop), keep it as degraded for a grace period
// This prevents false "exited" status during the brief moment between container removal and recreation
$recentlyRestarted = $application->restart_count > 0 &&
$application->last_restart_at &&
$application->last_restart_at->greaterThan(now()->subSeconds(30));
if ($recentlyRestarted) {
// Keep it as degraded if it was recently in a crash loop
debug: add comprehensive status change logging Added detailed debug logging to all status update paths to help diagnose why "unhealthy" status appears in the UI. ## Logging Added ### 1. PushServerUpdateJob (Sentinel updates) **Location**: Lines 303-315 **Logs**: Status changes from Sentinel push updates **Data tracked**: - Old vs new status - Container statuses that led to aggregation - Status flags (hasRunning, hasUnhealthy, hasUnknown) ### 2. GetContainersStatus (SSH updates) **Location**: Lines 441-449, 346-354, 358-365 **Logs**: Status changes from SSH-based checks **Scenarios**: - Normal status aggregation - Recently restarted containers (kept as degraded) - Applications not running (set to exited) **Data tracked**: - Old vs new status - Container statuses - Restart count and timing - Whether containers exist ### 3. Application Model Status Accessor **Location**: Lines 706-712, 726-732 **Logs**: When status is set without explicit health information **Issue**: Highlights cases where health defaults to "unhealthy" **Data tracked**: - Raw value passed to setter - Final result after default applied ## How to Use ### Enable Debug Logging Edit `.env` or `config/logging.php` to set log level to debug: ``` LOG_LEVEL=debug ``` ### Monitor Logs ```bash tail -f storage/logs/laravel.log | grep STATUS-DEBUG ``` ### Log Format All logs use `[STATUS-DEBUG]` prefix for easy filtering: ``` [2025-11-19 13:00:00] local.DEBUG: [STATUS-DEBUG] Sentinel status change { "source": "PushServerUpdateJob", "app_id": 123, "app_name": "my-app", "old_status": "running:unknown", "new_status": "running:healthy", "container_statuses": [...], "flags": {...} } ``` ## What to Look For 1. **Default to unhealthy**: Check Application model accessor logs 2. **Status flipping**: Compare timestamps between Sentinel and SSH updates 3. **Incorrect aggregation**: Check flags and container_statuses 4. **Stale database values**: Check if old_status persists across multiple logs ## Next Steps After gathering logs, we can: 1. Identify the exact source of "unhealthy" status 2. Determine if it's a default issue, aggregation bug, or timing problem 3. Apply targeted fix based on evidence 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-19 12:52:08 +00:00
Log::debug('[STATUS-DEBUG] Recently restarted - keeping degraded', [
'source' => 'GetContainersStatus (not running)',
'app_id' => $application->id,
'app_name' => $application->name,
'old_status' => $application->status,
'new_status' => 'degraded (unhealthy)',
'restart_count' => $application->restart_count,
'last_restart_at' => $application->last_restart_at,
]);
$application->update(['status' => 'degraded (unhealthy)']);
} else {
// Reset restart count when application exits completely
debug: add comprehensive status change logging Added detailed debug logging to all status update paths to help diagnose why "unhealthy" status appears in the UI. ## Logging Added ### 1. PushServerUpdateJob (Sentinel updates) **Location**: Lines 303-315 **Logs**: Status changes from Sentinel push updates **Data tracked**: - Old vs new status - Container statuses that led to aggregation - Status flags (hasRunning, hasUnhealthy, hasUnknown) ### 2. GetContainersStatus (SSH updates) **Location**: Lines 441-449, 346-354, 358-365 **Logs**: Status changes from SSH-based checks **Scenarios**: - Normal status aggregation - Recently restarted containers (kept as degraded) - Applications not running (set to exited) **Data tracked**: - Old vs new status - Container statuses - Restart count and timing - Whether containers exist ### 3. Application Model Status Accessor **Location**: Lines 706-712, 726-732 **Logs**: When status is set without explicit health information **Issue**: Highlights cases where health defaults to "unhealthy" **Data tracked**: - Raw value passed to setter - Final result after default applied ## How to Use ### Enable Debug Logging Edit `.env` or `config/logging.php` to set log level to debug: ``` LOG_LEVEL=debug ``` ### Monitor Logs ```bash tail -f storage/logs/laravel.log | grep STATUS-DEBUG ``` ### Log Format All logs use `[STATUS-DEBUG]` prefix for easy filtering: ``` [2025-11-19 13:00:00] local.DEBUG: [STATUS-DEBUG] Sentinel status change { "source": "PushServerUpdateJob", "app_id": 123, "app_name": "my-app", "old_status": "running:unknown", "new_status": "running:healthy", "container_statuses": [...], "flags": {...} } ``` ## What to Look For 1. **Default to unhealthy**: Check Application model accessor logs 2. **Status flipping**: Compare timestamps between Sentinel and SSH updates 3. **Incorrect aggregation**: Check flags and container_statuses 4. **Stale database values**: Check if old_status persists across multiple logs ## Next Steps After gathering logs, we can: 1. Identify the exact source of "unhealthy" status 2. Determine if it's a default issue, aggregation bug, or timing problem 3. Apply targeted fix based on evidence 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-19 12:52:08 +00:00
Log::debug('[STATUS-DEBUG] Application not running', [
'source' => 'GetContainersStatus (not running)',
'app_id' => $application->id,
'app_name' => $application->name,
'old_status' => $application->status,
'new_status' => 'exited',
'containers_exist' => ! $this->containers->isEmpty(),
]);
$application->update([
'status' => 'exited',
'restart_count' => 0,
'last_restart_at' => null,
'last_restart_type' => null,
]);
}
2024-05-07 13:41:50 +00:00
}
$notRunningApplicationPreviews = $previews->pluck('id')->diff($foundApplicationPreviews);
foreach ($notRunningApplicationPreviews as $previewId) {
$preview = $previews->where('id', $previewId)->first();
2024-05-07 13:41:50 +00:00
if (str($preview->status)->startsWith('exited')) {
continue;
}
// Only protection: If no containers at all, Docker query might have failed
if ($this->containers->isEmpty()) {
continue;
2024-05-07 13:41:50 +00:00
}
$preview->update(['status' => 'exited']);
2024-05-07 13:41:50 +00:00
}
$notRunningDatabases = $databases->pluck('id')->diff($foundDatabases);
foreach ($notRunningDatabases as $database) {
$database = $databases->where('id', $database)->first();
if (str($database->status)->startsWith('exited')) {
2024-05-07 13:41:50 +00:00
continue;
}
$database->update(['status' => 'exited']);
2024-05-07 13:41:50 +00:00
$name = data_get($database, 'name');
$fqdn = data_get($database, 'fqdn');
2024-05-07 13:41:50 +00:00
$containerName = $name;
$projectUuid = data_get($database, 'environment.project.uuid');
$environmentName = data_get($database, 'environment.name');
$databaseUuid = data_get($database, 'uuid');
2024-05-07 13:41:50 +00:00
if ($projectUuid && $databaseUuid && $environmentName) {
2024-12-02 21:49:55 +00:00
$url = base_url().'/project/'.$projectUuid.'/'.$environmentName.'/database/'.$databaseUuid;
2024-05-07 13:41:50 +00:00
} else {
$url = null;
}
// $this->server->team?->notify(new ContainerStopped($containerName, $this->server, $url));
2024-05-07 13:41:50 +00:00
}
// Aggregate multi-container application statuses
if (isset($this->applicationContainerStatuses) && $this->applicationContainerStatuses->isNotEmpty()) {
foreach ($this->applicationContainerStatuses as $applicationId => $containerStatuses) {
$application = $this->applications->where('id', $applicationId)->first();
if (! $application) {
continue;
}
// Track restart counts first
$maxRestartCount = 0;
if (isset($this->applicationContainerRestartCounts) && $this->applicationContainerRestartCounts->has($applicationId)) {
$containerRestartCounts = $this->applicationContainerRestartCounts->get($applicationId);
$maxRestartCount = $containerRestartCounts->max() ?? 0;
}
// Wrap all database updates in a transaction to ensure consistency
DB::transaction(function () use ($application, $maxRestartCount, $containerStatuses) {
$previousRestartCount = $application->restart_count ?? 0;
if ($maxRestartCount > $previousRestartCount) {
// Restart count increased - this is a crash restart
$application->update([
'restart_count' => $maxRestartCount,
'last_restart_at' => now(),
'last_restart_type' => 'crash',
]);
// Send notification
$containerName = $application->name;
$projectUuid = data_get($application, 'environment.project.uuid');
$environmentName = data_get($application, 'environment.name');
$applicationUuid = data_get($application, 'uuid');
if ($projectUuid && $applicationUuid && $environmentName) {
$url = base_url().'/project/'.$projectUuid.'/'.$environmentName.'/application/'.$applicationUuid;
} else {
$url = null;
}
}
// Aggregate status after tracking restart counts
$aggregatedStatus = $this->aggregateApplicationStatus($application, $containerStatuses, $maxRestartCount);
if ($aggregatedStatus) {
$statusFromDb = $application->status;
if ($statusFromDb !== $aggregatedStatus) {
debug: add comprehensive status change logging Added detailed debug logging to all status update paths to help diagnose why "unhealthy" status appears in the UI. ## Logging Added ### 1. PushServerUpdateJob (Sentinel updates) **Location**: Lines 303-315 **Logs**: Status changes from Sentinel push updates **Data tracked**: - Old vs new status - Container statuses that led to aggregation - Status flags (hasRunning, hasUnhealthy, hasUnknown) ### 2. GetContainersStatus (SSH updates) **Location**: Lines 441-449, 346-354, 358-365 **Logs**: Status changes from SSH-based checks **Scenarios**: - Normal status aggregation - Recently restarted containers (kept as degraded) - Applications not running (set to exited) **Data tracked**: - Old vs new status - Container statuses - Restart count and timing - Whether containers exist ### 3. Application Model Status Accessor **Location**: Lines 706-712, 726-732 **Logs**: When status is set without explicit health information **Issue**: Highlights cases where health defaults to "unhealthy" **Data tracked**: - Raw value passed to setter - Final result after default applied ## How to Use ### Enable Debug Logging Edit `.env` or `config/logging.php` to set log level to debug: ``` LOG_LEVEL=debug ``` ### Monitor Logs ```bash tail -f storage/logs/laravel.log | grep STATUS-DEBUG ``` ### Log Format All logs use `[STATUS-DEBUG]` prefix for easy filtering: ``` [2025-11-19 13:00:00] local.DEBUG: [STATUS-DEBUG] Sentinel status change { "source": "PushServerUpdateJob", "app_id": 123, "app_name": "my-app", "old_status": "running:unknown", "new_status": "running:healthy", "container_statuses": [...], "flags": {...} } ``` ## What to Look For 1. **Default to unhealthy**: Check Application model accessor logs 2. **Status flipping**: Compare timestamps between Sentinel and SSH updates 3. **Incorrect aggregation**: Check flags and container_statuses 4. **Stale database values**: Check if old_status persists across multiple logs ## Next Steps After gathering logs, we can: 1. Identify the exact source of "unhealthy" status 2. Determine if it's a default issue, aggregation bug, or timing problem 3. Apply targeted fix based on evidence 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-19 12:52:08 +00:00
Log::debug('[STATUS-DEBUG] SSH status change', [
'source' => 'GetContainersStatus',
'app_id' => $application->id,
'app_name' => $application->name,
'old_status' => $statusFromDb,
'new_status' => $aggregatedStatus,
'container_statuses' => $containerStatuses->toArray(),
'max_restart_count' => $maxRestartCount,
]);
$application->update(['status' => $aggregatedStatus]);
} else {
$application->update(['last_online_at' => now()]);
}
}
});
}
}
ServiceChecked::dispatch($this->server->team->id);
2024-05-07 13:41:50 +00:00
}
private function aggregateApplicationStatus($application, Collection $containerStatuses, int $maxRestartCount = 0): ?string
{
// Parse docker compose to check for excluded containers
$dockerComposeRaw = data_get($application, 'docker_compose_raw');
$excludedContainers = collect();
if ($dockerComposeRaw) {
try {
$dockerCompose = \Symfony\Component\Yaml\Yaml::parse($dockerComposeRaw);
$services = data_get($dockerCompose, 'services', []);
foreach ($services as $serviceName => $serviceConfig) {
// Check if container should be excluded
$excludeFromHc = data_get($serviceConfig, 'exclude_from_hc', false);
$restartPolicy = data_get($serviceConfig, 'restart', 'always');
if ($excludeFromHc || $restartPolicy === 'no') {
$excludedContainers->push($serviceName);
}
}
} catch (\Exception $e) {
// If we can't parse, treat all containers as included
}
}
// Filter out excluded containers
$relevantStatuses = $containerStatuses->filter(function ($status, $containerName) use ($excludedContainers) {
return ! $excludedContainers->contains($containerName);
});
// If all containers are excluded, don't update status
if ($relevantStatuses->isEmpty()) {
return null;
}
$hasRunning = false;
$hasRestarting = false;
$hasUnhealthy = false;
$hasUnknown = false;
$hasExited = false;
$hasStarting = false;
$hasPaused = false;
$hasDead = false;
foreach ($relevantStatuses as $status) {
if (str($status)->contains('restarting')) {
$hasRestarting = true;
} elseif (str($status)->contains('running')) {
$hasRunning = true;
if (str($status)->contains('unhealthy')) {
$hasUnhealthy = true;
}
if (str($status)->contains('unknown')) {
$hasUnknown = true;
}
} elseif (str($status)->contains('exited')) {
$hasExited = true;
$hasUnhealthy = true;
} elseif (str($status)->contains('created') || str($status)->contains('starting')) {
$hasStarting = true;
} elseif (str($status)->contains('paused')) {
$hasPaused = true;
} elseif (str($status)->contains('dead') || str($status)->contains('removing')) {
$hasDead = true;
}
}
if ($hasRestarting) {
return 'degraded (unhealthy)';
}
// If container is exited but has restart count > 0, it's in a crash loop
if ($hasExited && $maxRestartCount > 0) {
return 'degraded (unhealthy)';
}
if ($hasRunning && $hasExited) {
return 'degraded (unhealthy)';
}
if ($hasRunning) {
if ($hasUnhealthy) {
return 'running (unhealthy)';
} elseif ($hasUnknown) {
return 'running (unknown)';
} else {
return 'running (healthy)';
}
}
if ($hasDead) {
return 'degraded (unhealthy)';
}
if ($hasPaused) {
return 'paused (unknown)';
}
if ($hasStarting) {
return 'starting (unknown)';
}
// All containers are exited with no restart count - truly stopped
return 'exited (unhealthy)';
}
2024-05-07 13:41:50 +00:00
}