fix(deployment): prevent base deployments from being killed when PRs close (#7113)
- Fix container filtering to properly distinguish base deployments (pullRequestId=0) from PR deployments - Add deployment cancellation when PR closes via webhook to prevent race conditions - Prevent CleanupHelperContainersJob from killing active deployment containers - Enhance error messages with exit codes and actual errors instead of vague "Oops" messages - Protect status transitions in finally blocks to ensure proper job failure handling
This commit is contained in:
parent
775216e7a5
commit
67605d50fc
8 changed files with 269 additions and 40 deletions
|
|
@ -246,6 +246,50 @@ public function manual(Request $request)
|
|||
if ($action === 'closed') {
|
||||
$found = ApplicationPreview::where('application_id', $application->id)->where('pull_request_id', $pull_request_id)->first();
|
||||
if ($found) {
|
||||
// Cancel any active deployments for this PR immediately
|
||||
$activeDeployment = \App\Models\ApplicationDeploymentQueue::where('application_id', $application->id)
|
||||
->where('pull_request_id', $pull_request_id)
|
||||
->whereIn('status', [
|
||||
\App\Enums\ApplicationDeploymentStatus::QUEUED->value,
|
||||
\App\Enums\ApplicationDeploymentStatus::IN_PROGRESS->value,
|
||||
])
|
||||
->first();
|
||||
|
||||
if ($activeDeployment) {
|
||||
try {
|
||||
// Mark deployment as cancelled
|
||||
$activeDeployment->update([
|
||||
'status' => \App\Enums\ApplicationDeploymentStatus::CANCELLED_BY_USER->value,
|
||||
]);
|
||||
|
||||
// Add cancellation log entry
|
||||
$activeDeployment->addLogEntry('Deployment cancelled: Pull request closed.', 'stderr');
|
||||
|
||||
// Check if helper container exists and kill it
|
||||
$deployment_uuid = $activeDeployment->deployment_uuid;
|
||||
$server = $application->destination->server;
|
||||
$checkCommand = "docker ps -a --filter name={$deployment_uuid} --format '{{.Names}}'";
|
||||
$containerExists = instant_remote_process([$checkCommand], $server);
|
||||
|
||||
if ($containerExists && str($containerExists)->trim()->isNotEmpty()) {
|
||||
instant_remote_process(["docker rm -f {$deployment_uuid}"], $server);
|
||||
$activeDeployment->addLogEntry('Deployment container stopped.');
|
||||
}
|
||||
|
||||
// Kill running process if process ID exists
|
||||
if ($activeDeployment->current_process_id) {
|
||||
try {
|
||||
$processKillCommand = "kill -9 {$activeDeployment->current_process_id}";
|
||||
instant_remote_process([$processKillCommand], $server);
|
||||
} catch (\Throwable $e) {
|
||||
// Process might already be gone
|
||||
}
|
||||
}
|
||||
} catch (\Throwable $e) {
|
||||
// Silently handle errors during deployment cancellation
|
||||
}
|
||||
}
|
||||
|
||||
DeleteResourceJob::dispatch($found);
|
||||
$return_payloads->push([
|
||||
'application' => $application->name,
|
||||
|
|
@ -481,6 +525,51 @@ public function normal(Request $request)
|
|||
if ($action === 'closed' || $action === 'close') {
|
||||
$found = ApplicationPreview::where('application_id', $application->id)->where('pull_request_id', $pull_request_id)->first();
|
||||
if ($found) {
|
||||
// Cancel any active deployments for this PR immediately
|
||||
$activeDeployment = \App\Models\ApplicationDeploymentQueue::where('application_id', $application->id)
|
||||
->where('pull_request_id', $pull_request_id)
|
||||
->whereIn('status', [
|
||||
\App\Enums\ApplicationDeploymentStatus::QUEUED->value,
|
||||
\App\Enums\ApplicationDeploymentStatus::IN_PROGRESS->value,
|
||||
])
|
||||
->first();
|
||||
|
||||
if ($activeDeployment) {
|
||||
try {
|
||||
// Mark deployment as cancelled
|
||||
$activeDeployment->update([
|
||||
'status' => \App\Enums\ApplicationDeploymentStatus::CANCELLED_BY_USER->value,
|
||||
]);
|
||||
|
||||
// Add cancellation log entry
|
||||
$activeDeployment->addLogEntry('Deployment cancelled: Pull request closed.', 'stderr');
|
||||
|
||||
// Check if helper container exists and kill it
|
||||
$deployment_uuid = $activeDeployment->deployment_uuid;
|
||||
$server = $application->destination->server;
|
||||
$checkCommand = "docker ps -a --filter name={$deployment_uuid} --format '{{.Names}}'";
|
||||
$containerExists = instant_remote_process([$checkCommand], $server);
|
||||
|
||||
if ($containerExists && str($containerExists)->trim()->isNotEmpty()) {
|
||||
instant_remote_process(["docker rm -f {$deployment_uuid}"], $server);
|
||||
$activeDeployment->addLogEntry('Deployment container stopped.');
|
||||
}
|
||||
|
||||
// Kill running process if process ID exists
|
||||
if ($activeDeployment->current_process_id) {
|
||||
try {
|
||||
$processKillCommand = "kill -9 {$activeDeployment->current_process_id}";
|
||||
instant_remote_process([$processKillCommand], $server);
|
||||
} catch (\Throwable $e) {
|
||||
// Process might already be gone
|
||||
}
|
||||
}
|
||||
} catch (\Throwable $e) {
|
||||
// Silently handle errors during deployment cancellation
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up any deployed containers
|
||||
$containers = getCurrentApplicationContainerStatus($application->destination->server, $application->id, $pull_request_id);
|
||||
if ($containers->isNotEmpty()) {
|
||||
$containers->each(function ($container) use ($application) {
|
||||
|
|
|
|||
|
|
@ -341,20 +341,42 @@ public function handle(): void
|
|||
$this->fail($e);
|
||||
throw $e;
|
||||
} finally {
|
||||
$this->application_deployment_queue->update([
|
||||
'finished_at' => Carbon::now()->toImmutable(),
|
||||
]);
|
||||
|
||||
if ($this->use_build_server) {
|
||||
$this->server = $this->build_server;
|
||||
} else {
|
||||
$this->write_deployment_configurations();
|
||||
// Wrap cleanup operations in try-catch to prevent exceptions from interfering
|
||||
// with Laravel's job failure handling and status updates
|
||||
try {
|
||||
$this->application_deployment_queue->update([
|
||||
'finished_at' => Carbon::now()->toImmutable(),
|
||||
]);
|
||||
} catch (Exception $e) {
|
||||
// Log but don't fail - finished_at is not critical
|
||||
\Log::warning('Failed to update finished_at for deployment '.$this->deployment_uuid.': '.$e->getMessage());
|
||||
}
|
||||
|
||||
$this->application_deployment_queue->addLogEntry("Gracefully shutting down build container: {$this->deployment_uuid}");
|
||||
$this->graceful_shutdown_container($this->deployment_uuid);
|
||||
try {
|
||||
if ($this->use_build_server) {
|
||||
$this->server = $this->build_server;
|
||||
} else {
|
||||
$this->write_deployment_configurations();
|
||||
}
|
||||
} catch (Exception $e) {
|
||||
// Log but don't fail - configuration writing errors shouldn't prevent status updates
|
||||
$this->application_deployment_queue->addLogEntry('Warning: Failed to write deployment configurations: '.$e->getMessage(), 'stderr');
|
||||
}
|
||||
|
||||
ServiceStatusChanged::dispatch(data_get($this->application, 'environment.project.team.id'));
|
||||
try {
|
||||
$this->application_deployment_queue->addLogEntry("Gracefully shutting down build container: {$this->deployment_uuid}");
|
||||
$this->graceful_shutdown_container($this->deployment_uuid);
|
||||
} catch (Exception $e) {
|
||||
// Log but don't fail - container cleanup errors are expected when container is already gone
|
||||
\Log::warning('Failed to shutdown container '.$this->deployment_uuid.': '.$e->getMessage());
|
||||
}
|
||||
|
||||
try {
|
||||
ServiceStatusChanged::dispatch(data_get($this->application, 'environment.project.team.id'));
|
||||
} catch (Exception $e) {
|
||||
// Log but don't fail - event dispatch errors shouldn't prevent status updates
|
||||
\Log::warning('Failed to dispatch ServiceStatusChanged for deployment '.$this->deployment_uuid.': '.$e->getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3798,10 +3820,8 @@ private function failDeployment(): void
|
|||
public function failed(Throwable $exception): void
|
||||
{
|
||||
$this->failDeployment();
|
||||
$this->application_deployment_queue->addLogEntry('Oops something is not okay, are you okay? 😢', 'stderr');
|
||||
if (str($exception->getMessage())->isNotEmpty()) {
|
||||
$this->application_deployment_queue->addLogEntry($exception->getMessage(), 'stderr');
|
||||
}
|
||||
$errorMessage = $exception->getMessage() ?: 'Unknown error occurred';
|
||||
$this->application_deployment_queue->addLogEntry("Deployment failed: {$errorMessage}", 'stderr');
|
||||
|
||||
if ($this->application->build_pack !== 'dockercompose') {
|
||||
$code = $exception->getCode();
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
namespace App\Jobs;
|
||||
|
||||
use App\Enums\ApplicationDeploymentStatus;
|
||||
use App\Models\ApplicationDeploymentQueue;
|
||||
use App\Models\Server;
|
||||
use Illuminate\Bus\Queueable;
|
||||
use Illuminate\Contracts\Queue\ShouldBeEncrypted;
|
||||
|
|
@ -20,10 +22,51 @@ public function __construct(public Server $server) {}
|
|||
public function handle(): void
|
||||
{
|
||||
try {
|
||||
// Get all active deployments on this server
|
||||
$activeDeployments = ApplicationDeploymentQueue::where('server_id', $this->server->id)
|
||||
->whereIn('status', [
|
||||
ApplicationDeploymentStatus::IN_PROGRESS->value,
|
||||
ApplicationDeploymentStatus::QUEUED->value,
|
||||
])
|
||||
->pluck('deployment_uuid')
|
||||
->toArray();
|
||||
|
||||
\Log::info('CleanupHelperContainersJob - Active deployments', [
|
||||
'server' => $this->server->name,
|
||||
'active_deployment_uuids' => $activeDeployments,
|
||||
]);
|
||||
|
||||
$containers = instant_remote_process_with_timeout(['docker container ps --format \'{{json .}}\' | jq -s \'map(select(.Image | contains("'.config('constants.coolify.registry_url').'/coollabsio/coolify-helper")))\''], $this->server, false);
|
||||
$containerIds = collect(json_decode($containers))->pluck('ID');
|
||||
if ($containerIds->count() > 0) {
|
||||
foreach ($containerIds as $containerId) {
|
||||
$helperContainers = collect(json_decode($containers));
|
||||
|
||||
if ($helperContainers->count() > 0) {
|
||||
foreach ($helperContainers as $container) {
|
||||
$containerId = data_get($container, 'ID');
|
||||
$containerName = data_get($container, 'Names');
|
||||
|
||||
// Check if this container belongs to an active deployment
|
||||
$isActiveDeployment = false;
|
||||
foreach ($activeDeployments as $deploymentUuid) {
|
||||
if (str_contains($containerName, $deploymentUuid)) {
|
||||
$isActiveDeployment = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($isActiveDeployment) {
|
||||
\Log::info('CleanupHelperContainersJob - Skipping active deployment container', [
|
||||
'container' => $containerName,
|
||||
'id' => $containerId,
|
||||
]);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
\Log::info('CleanupHelperContainersJob - Removing orphaned helper container', [
|
||||
'container' => $containerName,
|
||||
'id' => $containerId,
|
||||
]);
|
||||
|
||||
instant_remote_process_with_timeout(['docker container rm -f '.$containerId], $this->server, false);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -124,6 +124,51 @@ private function deleteApplicationPreview()
|
|||
$this->resource->delete();
|
||||
}
|
||||
|
||||
// Cancel any active deployments for this PR (same logic as API cancel_deployment)
|
||||
$activeDeployment = \App\Models\ApplicationDeploymentQueue::where('application_id', $application->id)
|
||||
->where('pull_request_id', $pull_request_id)
|
||||
->whereIn('status', [
|
||||
\App\Enums\ApplicationDeploymentStatus::QUEUED->value,
|
||||
\App\Enums\ApplicationDeploymentStatus::IN_PROGRESS->value,
|
||||
])
|
||||
->first();
|
||||
|
||||
if ($activeDeployment) {
|
||||
try {
|
||||
// Mark deployment as cancelled
|
||||
$activeDeployment->update([
|
||||
'status' => \App\Enums\ApplicationDeploymentStatus::CANCELLED_BY_USER->value,
|
||||
]);
|
||||
|
||||
// Add cancellation log entry
|
||||
$activeDeployment->addLogEntry('Deployment cancelled: Pull request closed.', 'stderr');
|
||||
|
||||
// Check if helper container exists and kill it
|
||||
$deployment_uuid = $activeDeployment->deployment_uuid;
|
||||
$checkCommand = "docker ps -a --filter name={$deployment_uuid} --format '{{.Names}}'";
|
||||
$containerExists = instant_remote_process([$checkCommand], $server);
|
||||
|
||||
if ($containerExists && str($containerExists)->trim()->isNotEmpty()) {
|
||||
instant_remote_process(["docker rm -f {$deployment_uuid}"], $server);
|
||||
$activeDeployment->addLogEntry('Deployment container stopped.');
|
||||
} else {
|
||||
$activeDeployment->addLogEntry('Helper container not yet started. Deployment will be cancelled when job checks status.');
|
||||
}
|
||||
|
||||
// Kill running process if process ID exists
|
||||
if ($activeDeployment->current_process_id) {
|
||||
try {
|
||||
$processKillCommand = "kill -9 {$activeDeployment->current_process_id}";
|
||||
instant_remote_process([$processKillCommand], $server);
|
||||
} catch (\Throwable $e) {
|
||||
// Process might already be gone
|
||||
}
|
||||
}
|
||||
} catch (\Throwable $e) {
|
||||
// Silently handle errors during deployment cancellation
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
if ($server->isSwarm()) {
|
||||
instant_remote_process(["docker stack rm {$application->uuid}-{$pull_request_id}"], $server);
|
||||
|
|
@ -133,7 +178,7 @@ private function deleteApplicationPreview()
|
|||
}
|
||||
} catch (\Throwable $e) {
|
||||
// Log the error but don't fail the job
|
||||
ray('Error stopping preview containers: '.$e->getMessage());
|
||||
\Log::warning('Error stopping preview containers for application '.$application->uuid.', PR #'.$pull_request_id.': '.$e->getMessage());
|
||||
}
|
||||
|
||||
// Finally, force delete to trigger resource cleanup
|
||||
|
|
@ -156,7 +201,6 @@ private function stopPreviewContainers(array $containers, $server, int $timeout
|
|||
"docker stop --time=$timeout $containerList",
|
||||
"docker rm -f $containerList",
|
||||
];
|
||||
|
||||
instant_remote_process(
|
||||
command: $commands,
|
||||
server: $server,
|
||||
|
|
|
|||
|
|
@ -219,9 +219,22 @@ private function executeCommandWithProcess($command, $hidden, $customType, $appe
|
|||
$process_result = $process->wait();
|
||||
if ($process_result->exitCode() !== 0) {
|
||||
if (! $ignore_errors) {
|
||||
// Check if deployment was cancelled while command was running
|
||||
if (isset($this->application_deployment_queue)) {
|
||||
$this->application_deployment_queue->refresh();
|
||||
if ($this->application_deployment_queue->status === \App\Enums\ApplicationDeploymentStatus::CANCELLED_BY_USER->value) {
|
||||
throw new \RuntimeException('Deployment cancelled by user', 69420);
|
||||
}
|
||||
}
|
||||
|
||||
// Don't immediately set to FAILED - let the retry logic handle it
|
||||
// This prevents premature status changes during retryable SSH errors
|
||||
throw new \RuntimeException($process_result->errorOutput());
|
||||
$error = $process_result->errorOutput();
|
||||
if (empty($error)) {
|
||||
$error = $process_result->output() ?: 'Command failed with no error output';
|
||||
}
|
||||
$redactedCommand = $this->redact_sensitive_info($command);
|
||||
throw new \RuntimeException("Command execution failed (exit code {$process_result->exitCode()}): {$redactedCommand}\nError: {$error}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,13 +17,31 @@ function getCurrentApplicationContainerStatus(Server $server, int $id, ?int $pul
|
|||
if (! $server->isSwarm()) {
|
||||
$containers = instant_remote_process(["docker ps -a --filter='label=coolify.applicationId={$id}' --format '{{json .}}' "], $server);
|
||||
$containers = format_docker_command_output_to_json($containers);
|
||||
|
||||
$containers = $containers->map(function ($container) use ($pullRequestId, $includePullrequests) {
|
||||
$labels = data_get($container, 'Labels');
|
||||
if (! str($labels)->contains('coolify.pullRequestId=')) {
|
||||
data_set($container, 'Labels', $labels.",coolify.pullRequestId={$pullRequestId}");
|
||||
$containerName = data_get($container, 'Names');
|
||||
$hasPrLabel = str($labels)->contains('coolify.pullRequestId=');
|
||||
$prLabelValue = null;
|
||||
|
||||
if ($hasPrLabel) {
|
||||
preg_match('/coolify\.pullRequestId=(\d+)/', $labels, $matches);
|
||||
$prLabelValue = $matches[1] ?? null;
|
||||
}
|
||||
|
||||
// Treat pullRequestId=0 or missing label as base deployment (convention: 0 = no PR)
|
||||
$isBaseDeploy = ! $hasPrLabel || (int) $prLabelValue === 0;
|
||||
|
||||
// If we're looking for a specific PR and this is a base deployment, exclude it
|
||||
if ($pullRequestId !== null && $pullRequestId !== 0 && $isBaseDeploy) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// If this is a base deployment, include it when not filtering for PRs
|
||||
if ($isBaseDeploy) {
|
||||
return $container;
|
||||
}
|
||||
|
||||
if ($includePullrequests) {
|
||||
return $container;
|
||||
}
|
||||
|
|
@ -34,7 +52,9 @@ function getCurrentApplicationContainerStatus(Server $server, int $id, ?int $pul
|
|||
return null;
|
||||
});
|
||||
|
||||
return $containers->filter();
|
||||
$filtered = $containers->filter();
|
||||
|
||||
return $filtered;
|
||||
}
|
||||
|
||||
return $containers;
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue